Here is the last script I used. Someone may be interested in adapting it for its own reality. It helped me a lot accelerating the cleaning of doublons. It shows a diff file that allows to check if two similar files are semantically identical. In case of necessity of merging, I used meld e did manually merging in joplin. I could clean about 5000 doublons in maybe about 10-15 hours.
import difflib
from joppy.api import Api
api = Api(token='b654d8fb77bc7a32ea15033339ae8f94192434076ac0e59f1c4839e6c131b26dd95275e655a557cc8217c561647968835e28a2a4bf9ea2cbcc6ceb9cd02e0516')
import getkey
import re
#define manually the name of notebooks of interest for merging (name is case sensitive)
#first two notebook where are to be found duplicated notes
notebook1name='Google_keep_desktop'
notebook2name='Google_keep_laptop'
#notebook were will be put the notes that will be kept
notebook_of_analizednotes_name='Google_keep_analized_notes'
#notebook were will be put notes that in principle should and could be deleted
notebook_of_tobedeletednotes_name='Google_keep_to_be_deleted'
#get the list of id and title of all notebooks
notebooklist=api.get_notebooks(fields='id,title')['items']
#notebooklist is such that:
#notebooklist[0]['id'] give the id of the first notebook
#notebooklist[0]['title'] give the title of the first notebook
#select the notebook for which you want to see if it contains notes that are duplicated
#and select the notebook in which you want to put the note after managing it (notebook_of_analizednotes
# logic: if note have no doublon but
print("list of notebooks are the following:\n")
for i,notebookdic in enumerate(notebooklist):
print(i,' ',notebookdic['title'])
#ask name of notebooks
#notebook1name=input('give the name of the notebook for which in you want to check if notes inside it have doublons:\n')
#notebook_of_analizednotes_name=input('give the name of the notebook in which you will put anotation that will be kept:\n')
#notebook_of_tobedeletednotes_name=input('give the name of the notebook in which you will put anotations that will not be kept:\n')
notebook1name='Google_Keep_desktop_attachement'
notebook_of_analizednotes_name='Google_Keep_laptop_analized_attachement'
notebook_of_tobedeletednotes_name='Google_keep_to_be_deleted'
########## parei aqui no 08/11/2021
for notebookdic in enumerate(notebooklist):
if notebookdic[1]['title']==notebook1name:
notebook1=notebookdic[1]
if notebookdic[1]['title']==notebook_of_analizednotes_name:
notebook_of_analizednotes=notebookdic[1]
if notebookdic[1]['title']==notebook_of_tobedeletednotes_name:
notebook_of_tobedeletednotes=notebookdic[1]
print("notebook1 is", str(notebook1),"\n")
print("notebook_of_analizednotes is", str(notebook_of_analizednotes),"\n")
print("notebook_of_tobedeletednotes is", str(notebook_of_tobedeletednotes),"\n")
#loop on all notes of notebook2
for note in api.get_all_notes(notebook_id=notebook1['id'],fields='id,title,body'):
# create a list containig all line of the note removing some special characteres
notelines=note['body'].replace('(',' ').replace(')',' ').replace('[',' ').replace(']',' ').replace('"',' ').replace('&',' ').replace('#',' ').replace('%',' ').replace('.',' ').split('\n')
# abandonned approach:
#notelines=re.split(r'[`\-=~!@#$%^&*()_+\[\]{};\'\\:"|<,./<>?]', note['body'])
#make a search with the longest line of the note
q=max(notelines,key=len)
#adding " in order to get a correct search query
q='"'+max(notelines,key=len)+'"'
print("searching for:",q)
identicalnotes=api.search(query=q)
print("number of notes:",len(identicalnotes['items']))
#printing all notes in order to see note titles
print("identical notes:")
for identicalnote in identicalnotes['items']:
print(identicalnote)
#if a single note is found
if len(identicalnotes['items'])==1:
#move not duplicated notes to the notebook of analyzed notes
api.modify_note(id_=identicalnotes['items'][0]['id'], parent_id=notebook_of_analizednotes['id'])
if len(identicalnotes['items'])>2:
print("they are more than 2 notes at this point")
# reduce note list to note that are in the two notebooks were notes have to be analized:
identicalnotes2=[i for i in identicalnotes['items'] if (i['parent_id']=='dfa6f637732f4d318b4fcf21b05aa6af' or i['parent_id']=='8b31a7b0486a453fb425516432a96225' )]
print("identical notes that have not yet been analized:")
for i,identicalnote in enumerate(identicalnotes2):
print(i, identicalnote)
while 1:
print("""
choose your option :
Press Enter twice -> do nothing
Press numbers of two notes -> compare these notes
""")
key1 = getkey.getkey()
key2 = getkey.getkey()
if key1 == '\n':
break
elif key1 != key2:
#a preencher
d = difflib.Differ()
#substitute identicalnotes['items'][0]['id'] by identicalnotes['items']
note1=api.get_note(id_=identicalnotes2[int(key1)]['id'],fields='id,title,body')
#list of lines of note 1
text1_lines=note1['body'].splitlines()
note2=api.get_note(id_=identicalnotes2[int(key2)]['id'],fields='id,title,body')
#list of lines of note 2
text2_lines=note2['body'].splitlines()
#compare notes and give a representation of the difference beetween notes
diff = d.compare(text1_lines, text2_lines)
bodiescompare='\n'.join(diff)
# choose only "+ notes" or "- notes"
#if bodiescompare.splitlines()[0][0]=='-' and bodiescompare.splitlines()[0][0]=='-':
#if bodiescompare.splitlines()[0][0]=='+' and bodiescompare.splitlines()[0][0]=='+':
print("\n\n\n\n\n\n----------------- new comparison of possibly identical notes---------------\n")
print(bodiescompare)
print("\n---------------- end of comprison of possibly identical notes---------------\n\n")
while 1:
print("""
choose your option:
1: note 1 -> analized notebook
note 2 -> tobedeleted notebook
analize next note
2: note 2 -> analized notebook
note 1 -> tobedeleted notebook
analize next note
3: do not modify note and analize next note
analize next note
""")
key3 = getkey.getkey()
if key3 == '1':
print('note 1 -> analized notebook')
#move note1 to analized notebook
api.modify_note(id_=note1['id'], parent_id=notebook_of_analizednotes['id'])
print('note 2 -> tobedeleted notebook')
#move note2 to tobedeleted notebook
api.modify_note(id_=note2['id'], parent_id=notebook_of_tobedeletednotes['id'])
print('analizing next note')
break
elif key3 == '2':
print('note 1 -> tobedeleted notebook')
#move note1 to tobedeleted notebook
api.modify_note(id_=note1['id'], parent_id=notebook_of_tobedeletednotes['id'])
print('note 2 -> analized notebook')
#move note1 to analized notebook
api.modify_note(id_=note2['id'], parent_id=notebook_of_analizednotes['id'])
print('analizing next note')
break
elif key3 == '3':
break
#print('note 1 -> analized notebook')
#move note1 to analized notebook
#api.modify_note(id_=note1['id'], parent_id=notebook_of_analizednotes['id'])
#print('note 2 -> tobedeleted notebook')
#move note2 to tobedeleted notebook
#api.modify_note(id_=note2['id'], parent_id=notebook_of_tobedeletednotes['id'])
#print('analizing next note')
break
elif key1 == key2:
identicalnotes['items'](key1)
identicalnotes['items'].pop(key1)
print("list of identical notes are now:")
for identicalnote in identicalnotes['items']:
print(identicalnote)
#print('note 1 -> tobedeleted notebook')
#move note1 to tobedeleted notebook
#api.modify_note(id_=note1['id'], parent_id=notebook_of_tobedeletednotes['id'])
#print('note 2 -> analized notebook')
#move note1 to analized notebook
#api.modify_note(id_=note2['id'], parent_id=notebook_of_analizednotes['id'])
#print('analizing next note')
break
elif key == '3':
identicalnotes['items'].pop()
break
elif key == '4':
break
#q=input()
#identicalnotes=api.search(query=q)
# new ideas to develop:
#print("they are more than 2 notes. Cycling through notes in order to find possibly some search query that can be addressed fast---")
#for note2 in identicalnotes['items']:
# notelines=re.split(r'[`\-=~!@#$%^&*()_+\[\]{};\'\\:"|<,./<>?]', note2['body'])
# q=max(notelines,key=len)
# identicalnotes=api.search(query=q)
#else:
# print("could strip down to 2 notes")
#q não é vazia pode tentar a comparação de anotações
#if len(q)>10:
# ok=1
# q='"'+q+'"'
# identicalnotes=api.search(query=q)
if len(identicalnotes['items'])==2:
#starting comparison of 2 notes and printing
d = difflib.Differ()
note1=api.get_note(id_=identicalnotes['items'][0]['id'],fields='id,title,body')
#list of lines of note 1
text1_lines=note1['body'].splitlines()
note2=api.get_note(id_=identicalnotes['items'][1]['id'],fields='id,title,body')
#list of lines of note 2
text2_lines=note2['body'].splitlines()
#compare notes and give a representation of the difference beetween notes
diff = d.compare(text1_lines, text2_lines)
bodiescompare='\n'.join(diff)
# choose only "+ notes" or "- notes"
#if bodiescompare.splitlines()[0][0]=='-' and bodiescompare.splitlines()[0][0]=='-':
#if bodiescompare.splitlines()[0][0]=='+' and bodiescompare.splitlines()[0][0]=='+':
print("\n\n\n\n\n\n----------------- new comparison of possibly identical notes---------------\n")
print(bodiescompare)
print("\n---------------- end of comprison of possibly identical notes---------------\n\n")
while 1:
print("""
choose your option:
1: note 1 -> analized notebook
note 2 -> tobedeleted notebook
analize next note
2: note 2 -> analized notebook
note 1 -> tobedeleted notebook
analize next note
3: do not modify note and analize next note
analize next note
""")
key = getkey.getkey()
if key == '1':
print('note 1 -> analized notebook')
#move note1 to analized notebook
api.modify_note(id_=note1['id'], parent_id=notebook_of_analizednotes['id'])
print('note 2 -> tobedeleted notebook')
#move note2 to tobedeleted notebook
api.modify_note(id_=note2['id'], parent_id=notebook_of_tobedeletednotes['id'])
print('analizing next note')
break
elif key == '2':
print('note 1 -> tobedeleted notebook')
#move note1 to tobedeleted notebook
api.modify_note(id_=note1['id'], parent_id=notebook_of_tobedeletednotes['id'])
print('note 2 -> analized notebook')
#move note1 to analized notebook
api.modify_note(id_=note2['id'], parent_id=notebook_of_analizednotes['id'])
print('analizing next note')
break
elif key == '3':
break