├── .gitignore ├── README.md ├── get_hypothesis_notes.py └── get_pinboard_notes.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .env 3 | annotations.json 4 | getnotes.sh 5 | secrets.py 6 | src/ 7 | 8 | *.DS_Store 9 | 10 | # Consider removing already checked-in checkpoints? 11 | .ipynb_checkpoints 12 | 13 | #active-learning-changing-environments/etc/ 14 | 15 | # remove .pyc files 16 | *.pyc 17 | 18 | # remove .p pickle files 19 | *.p 20 | 21 | # remove np.save files 22 | *.npy 23 | 24 | # Items below taken from github's tex gitignore 25 | 26 | ## Core latex/pdflatex auxiliary files: 27 | *.aux 28 | *.lof 29 | *.log 30 | *.lot 31 | *.fls 32 | *.out 33 | *.toc 34 | *.fmt 35 | *.fot 36 | *.cb 37 | *.cb2 38 | 39 | ## Intermediate documents: 40 | *.dvi 41 | *.nav 42 | *.snm 43 | *-converted-to.* 44 | # these rules might exclude image files for figures etc. 45 | 46 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 47 | *.bbl 48 | *.bcf 49 | *.blg 50 | *-blx.aux 51 | *-blx.bib 52 | *.brf 53 | *.run.xml 54 | 55 | ## Build tool auxiliary files: 56 | *.fdb_latexmk 57 | *.synctex 58 | *.synctex(busy) 59 | *.synctex.gz 60 | *.synctex.gz(busy) 61 | *.pdfsync 62 | 63 | # endfloat 64 | *.ttt 65 | *.fff 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Annotation tools 2 | 3 | ## exporting hypothesis annotations to obsidian (markdown files) 4 | 5 | connecting [hypothesis](https://hypothes.is) and [obsidian.md](obsidian.md). Inspired by the tutorial from [Shawn Graham](https://electricarchaeology.ca/2021/02/14/from-hypothesis-annotation-to-obsidian-note/). 6 | 7 | First, get ‘Hypexport’ from https://github.com/karlicoss/hypexport . 8 | 9 | Install it with 10 | 11 | ``` pip install --user git+https://github.com/karlicoss/hypexport ``` 12 | 13 | Then, create a new text file; call it secrets.py and put into it your Hypothesis username and your developer token: 14 | 15 | ``` 16 | username = "USERNAME" 17 | token = "TOKEN" 18 | ``` 19 | 20 | ### Getting annotations 21 | Grab all of your annotations with: 22 | 23 | ``` python -m hypexport.export --secrets secrets.py > annotations.json ``` 24 | 25 | ### Creating markdown files 26 | 27 | create an out directory for the markdown notes. 28 | 29 | ```mmkdir out``` 30 | 31 | create a ```.env``` file and add the date you want to pull annotations from. 32 | This environmment variable will be updated every time the function is called. 33 | 34 | ```hypothesis_last_pull="2021-02-25" ``` 35 | 36 | 37 | Then run ```python get_hypothesis_notes.py``` script. 38 | 39 | --- 40 | 41 | ## Exporting pinboard notes 42 | 43 | add pinboard API key to ```secrets.py``` file 44 | 45 | ``` pinboard_key = 'username:key-number' ``` 46 | 47 | 48 | add last pull variable to the ```.env``` file, e.g.: 49 | 50 | ```pinboard_last_pull="2021-02-25" ``` 51 | 52 | then run: 53 | 54 | ``` python get_pinboard_notes.py ``` 55 | 56 | ---- 57 | to do: 58 | 59 | - add #to-read tag to "read later" pinboard bookmarks 60 | - think about Maps of Content 61 | - tag hierarchies 62 | - tag disambiguation / suggestions / + bundles 63 | - think about [[links]] between pages (beyond tags) 64 | - look into images 65 | -------------------------------------------------------------------------------- /get_hypothesis_notes.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv, find_dotenv, set_key 3 | load_dotenv() 4 | 5 | import json 6 | import string 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from dateutil.parser import parse 12 | 13 | ROOT = os.path.dirname(os.path.abspath(__file__)) 14 | 15 | with open('annotations.json', 'r', encoding='utf-8') as j: 16 | contents = json.loads(j.read()) 17 | 18 | 19 | # pull last import date from .env file 20 | hypothesis_last_pull = os.getenv('hypothesis_last_pull') 21 | 22 | print("hypothesis - Last date pulled: ", hypothesis_last_pull) 23 | 24 | # if None, set to 1990 25 | if hypothesis_last_pull is None: 26 | hypothesis_last_pull = "1990-01-01" 27 | 28 | # filter annotations based on last pulled date 29 | contents['annotations'] = [i for i in contents['annotations'] if parse(i['updated'][:10])>=parse(hypothesis_last_pull)] 30 | 31 | print("new notes: ", len(contents['annotations'])) 32 | 33 | 34 | all_notes =[] 35 | 36 | # extract annotation info from each hypothesis document 37 | for i in range(len(contents['annotations'])): 38 | 39 | anno = contents['annotations'][i] 40 | 41 | created = anno['updated'] 42 | if len(anno['document'])==0: 43 | title = created[:10]+"-"+"no-title" 44 | else: 45 | title = anno['document']['title'][0] 46 | title = title.translate(str.maketrans('', '', string.punctuation)).lower() 47 | title = (created[:10]+"-"+title).replace(" ", "-") 48 | 49 | context_href = anno['links']['incontext'] 50 | uri = anno['uri'] 51 | tags = anno['tags'] 52 | try: 53 | highlights = [i['exact'] for i in anno['target'][0]['selector'] if 'exact' in i.keys()][0] 54 | except: 55 | print(i) 56 | print(anno['target']) 57 | 58 | note = anno['text'] 59 | 60 | n ={} 61 | 62 | date = created[:10] 63 | tags = "#"+' #'.join(tags) 64 | url = context_href 65 | title = title 66 | 67 | n['title'] = title 68 | n['tags'] = tags 69 | n['date'] = date 70 | n['url'] = url 71 | n['highlights'] = "> " +highlights +" s\n" +note 72 | n['uri'] = uri 73 | 74 | all_notes.append(n) 75 | 76 | # create dataframe and unify notes with same title / date pairs 77 | df = pd.DataFrame(all_notes).groupby(["title","date","uri"])['highlights'].apply(list).reset_index(name='highlights') 78 | df['tags'] = pd.DataFrame(all_notes).groupby(["title","date"])['tags'].apply(lambda x: list(np.unique(x))).reset_index(name='tags')['tags'].values 79 | df['url'] = pd.DataFrame(all_notes).groupby(["title","date"])['url'].apply(lambda x: list(np.unique(x))).reset_index(name='url')['url'].values 80 | 81 | # update last pull in .env file 82 | last_pull = max(df['date']) 83 | print(last_pull) 84 | 85 | # bundle new notes to unique folder 86 | base_path = os.path.join(ROOT, "out", "hypothesis", last_pull) 87 | os.makedirs(base_path, exist_ok=True) 88 | 89 | # create markdown files for each document with highlights and notes 90 | for i,note_file in df.iterrows(): 91 | 92 | title = note_file['title'] 93 | tags = note_file['tags'] 94 | date = note_file['date'] 95 | uri = note_file['uri'] 96 | 97 | with open(os.path.join(base_path, title+'.md'), 'w', encoding='utf-8') as out: 98 | title_line= "# "+title[11:]+"\n\n" 99 | tag_line = "tags: "+ " ".join([i for i in " ".join(tags).split(" ") if len(i)>1])+"\n" 100 | uri_line = "uri: ["+title[11:]+"]("+uri+")\n" 101 | date_line = "date: "+ date+"\n" 102 | 103 | high_line = "" 104 | for index,high in enumerate(note_file['highlights']): 105 | high_line += high +"\n" 106 | high_line += "[hypothesis ref]("+note_file['url'][0]+")\n\n ----\n" 107 | 108 | out.writelines([title_line, 109 | tag_line, 110 | uri_line, 111 | date_line, 112 | "### highlight:\n",high_line]) 113 | 114 | dotenv_file = find_dotenv() 115 | set_key(dotenv_file, "hypothesis_last_pull", last_pull) 116 | 117 | -------------------------------------------------------------------------------- /get_pinboard_notes.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv, find_dotenv, set_key 3 | load_dotenv() 4 | 5 | from dateutil.parser import parse 6 | 7 | import pandas as pd 8 | import json 9 | import string 10 | 11 | import pinboard 12 | 13 | import secrets as se 14 | 15 | pk = se.pinboard_key 16 | pb = pinboard.Pinboard(pk) 17 | 18 | out = pb.posts.all(parse_response=False) 19 | out = json.loads(out.read().decode("utf-8")) 20 | 21 | # pull last import date from .env file 22 | pinboard_last_pull = os.getenv('pinboard_last_pull') 23 | print("pinboard last date pulled: ", pinboard_last_pull) 24 | 25 | # if None, set to 1990 26 | if pinboard_last_pull is None: 27 | pinboard_last_pull = "1990-01-01" 28 | 29 | # filter bookmarks based on last pulled date 30 | out = [i for i in out if parse(i['time'][:10])>=parse(pinboard_last_pull)] 31 | print ("new notes: ", len(out)) 32 | 33 | assert len(out)>0 34 | 35 | # update last pull in .env file 36 | last_pull = pd.DataFrame(out).time.max()[:10] 37 | 38 | dotenv_file = find_dotenv() 39 | set_key(dotenv_file, "pinboard_last_pull", last_pull) 40 | 41 | # bundle new notes to unique folder 42 | os.mkdir("out/pinboard/"+last_pull) 43 | 44 | for pb_note in out: 45 | 46 | href = pb_note['href'] 47 | title = pb_note['description'] 48 | filename = title.translate(str.maketrans('', '', string.punctuation)).lower() 49 | filename = filename.replace(" ", "-") 50 | note = pb_note['extended'] 51 | date = pb_note['time'][:10] 52 | tags = pb_note['tags'] 53 | 54 | if pb_note['toread'] == 'yes': 55 | tags += ' to-read' 56 | tags = " ".join(["#"+i for i in tags.split(" ")]) 57 | 58 | with open("out/pinboard/"+last_pull+"/"+filename+'.md','w') as file: 59 | title_line= "# "+title+"\n\n" 60 | tag_line = "tags: "+ tags +"\n\n" 61 | uri_line = "uri: ["+title+"]("+href+")\n" 62 | date_line = "date: "+ date+"\n" 63 | if len(note)>0: 64 | note_line = "### note:\n"+note 65 | else: 66 | note_line = "" 67 | 68 | file.writelines([title_line, 69 | date_line, 70 | tag_line, 71 | uri_line, 72 | note_line]) 73 | 74 | 75 | --------------------------------------------------------------------------------