├── .gitignore
├── README.md
├── get_hypothesis_notes.py
└── get_pinboard_notes.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | .env
 3 | annotations.json
 4 | getnotes.sh
 5 | secrets.py
 6 | src/
 7 | 
 8 | *.DS_Store
 9 | 
10 | # Consider removing already checked-in checkpoints?
11 | .ipynb_checkpoints
12 | 
13 | #active-learning-changing-environments/etc/
14 | 
15 | # remove .pyc files
16 | *.pyc
17 | 
18 | # remove .p pickle files
19 | *.p
20 | 
21 |  # remove np.save files
22 | *.npy
23 | 
24 | # Items below taken from github's tex gitignore
25 | 
26 | ## Core latex/pdflatex auxiliary files:
27 | *.aux
28 | *.lof
29 | *.log
30 | *.lot
31 | *.fls
32 | *.out
33 | *.toc
34 | *.fmt
35 | *.fot
36 | *.cb
37 | *.cb2
38 | 
39 | ## Intermediate documents:
40 | *.dvi
41 | *.nav
42 | *.snm
43 | *-converted-to.*
44 | # these rules might exclude image files for figures etc.
45 | 
46 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
47 | *.bbl
48 | *.bcf
49 | *.blg
50 | *-blx.aux
51 | *-blx.bib
52 | *.brf
53 | *.run.xml
54 | 
55 | ## Build tool auxiliary files:
56 | *.fdb_latexmk
57 | *.synctex
58 | *.synctex(busy)
59 | *.synctex.gz
60 | *.synctex.gz(busy)
61 | *.pdfsync
62 | 
63 | # endfloat
64 | *.ttt
65 | *.fff
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Annotation tools
 2 | 
 3 | ## exporting hypothesis annotations to obsidian (markdown files)
 4 | 
 5 | connecting [hypothesis](https://hypothes.is) and [obsidian.md](obsidian.md). Inspired by the tutorial from [Shawn Graham](https://electricarchaeology.ca/2021/02/14/from-hypothesis-annotation-to-obsidian-note/).
 6 | 
 7 | First, get ‘Hypexport’ from https://github.com/karlicoss/hypexport . 
 8 | 
 9 | Install it with
10 | 
11 | ``` pip install --user git+https://github.com/karlicoss/hypexport ```
12 | 
13 | Then, create a new text file; call it secrets.py and put into it your Hypothesis username and your developer token:
14 | 
15 | ```
16 | username = "USERNAME"
17 | token = "TOKEN"
18 | ```
19 | 
20 | ### Getting annotations
21 | Grab all of your annotations with:
22 | 
23 | ``` python -m hypexport.export --secrets secrets.py > annotations.json ```
24 | 
25 | ### Creating markdown files
26 | 
27 | create an out directory for the markdown notes. 
28 | 
29 | ```mmkdir out```
30 | 
31 | create a ```.env``` file and add the date you want to pull annotations from. 
32 | This environmment variable will be updated every time the function is called.
33 | 
34 | ```hypothesis_last_pull="2021-02-25" ```
35 | 
36 | 
37 | Then run ```python get_hypothesis_notes.py``` script.
38 | 
39 | ---
40 | 
41 | ## Exporting pinboard notes
42 | 
43 | add pinboard API key to ```secrets.py``` file
44 | 
45 | ``` pinboard_key = 'username:key-number' ```
46 | 
47 | 
48 | add last pull variable to the ```.env``` file, e.g.:
49 | 
50 | ```pinboard_last_pull="2021-02-25" ```
51 | 
52 | then run:
53 | 
54 | ``` python get_pinboard_notes.py ```
55 | 
56 | ----
57 | to do:
58 | 
59 | - add #to-read tag to "read later" pinboard bookmarks
60 | - think about Maps of Content 
61 | - tag hierarchies 
62 | - tag disambiguation / suggestions / + bundles
63 | - think about [[links]] between pages (beyond tags)
64 | - look into images
65 | 


--------------------------------------------------------------------------------
/get_hypothesis_notes.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from dotenv import load_dotenv, find_dotenv, set_key
  3 | load_dotenv() 
  4 | 
  5 | import json
  6 | import string
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | 
 11 | from dateutil.parser import parse
 12 | 
 13 | ROOT = os.path.dirname(os.path.abspath(__file__))
 14 | 
 15 | with open('annotations.json', 'r', encoding='utf-8') as j:
 16 |      contents = json.loads(j.read())
 17 | 
 18 | 
 19 | # pull last import date from .env file
 20 | hypothesis_last_pull = os.getenv('hypothesis_last_pull')
 21 | 
 22 | print("hypothesis - Last date pulled: ", hypothesis_last_pull)
 23 | 
 24 | # if None, set to 1990 
 25 | if hypothesis_last_pull is None:
 26 |     hypothesis_last_pull = "1990-01-01"
 27 | 
 28 | # filter annotations based on last pulled date
 29 | contents['annotations'] = [i for i in contents['annotations'] if parse(i['updated'][:10])>=parse(hypothesis_last_pull)]
 30 | 
 31 | print("new notes: ", len(contents['annotations']))
 32 | 
 33 | 
 34 | all_notes =[]
 35 | 
 36 | # extract annotation info from each hypothesis document
 37 | for i in range(len(contents['annotations'])):
 38 | 
 39 |     anno = contents['annotations'][i]
 40 | 
 41 |     created = anno['updated']
 42 |     if len(anno['document'])==0:
 43 |         title = created[:10]+"-"+"no-title"
 44 |     else:
 45 |         title = anno['document']['title'][0]
 46 |         title = title.translate(str.maketrans('', '', string.punctuation)).lower()
 47 |         title = (created[:10]+"-"+title).replace(" ", "-")
 48 | 
 49 |     context_href = anno['links']['incontext']
 50 |     uri = anno['uri']
 51 |     tags = anno['tags']
 52 |     try:
 53 |         highlights = [i['exact'] for i in anno['target'][0]['selector'] if 'exact' in i.keys()][0]
 54 |     except:
 55 |         print(i)
 56 |         print(anno['target'])
 57 |     
 58 |     note = anno['text']
 59 |     
 60 |     n ={}
 61 |     
 62 |     date = created[:10]
 63 |     tags = "#"+' #'.join(tags)
 64 |     url = context_href
 65 |     title = title
 66 |     
 67 |     n['title'] = title
 68 |     n['tags'] = tags
 69 |     n['date'] = date
 70 |     n['url'] = url
 71 |     n['highlights'] = "> " +highlights +" s\n" +note 
 72 |     n['uri'] = uri
 73 |     
 74 |     all_notes.append(n)
 75 | 
 76 | # create dataframe and unify notes with same title / date pairs
 77 | df = pd.DataFrame(all_notes).groupby(["title","date","uri"])['highlights'].apply(list).reset_index(name='highlights')
 78 | df['tags'] = pd.DataFrame(all_notes).groupby(["title","date"])['tags'].apply(lambda x: list(np.unique(x))).reset_index(name='tags')['tags'].values
 79 | df['url'] = pd.DataFrame(all_notes).groupby(["title","date"])['url'].apply(lambda x: list(np.unique(x))).reset_index(name='url')['url'].values
 80 | 
 81 | # update last pull in .env file
 82 | last_pull = max(df['date'])
 83 | print(last_pull)
 84 | 
 85 | # bundle new notes to unique folder
 86 | base_path = os.path.join(ROOT, "out", "hypothesis", last_pull)
 87 | os.makedirs(base_path, exist_ok=True)
 88 | 
 89 | # create markdown files for each document with highlights and notes
 90 | for i,note_file in df.iterrows():
 91 | 
 92 |     title = note_file['title']
 93 |     tags = note_file['tags']
 94 |     date = note_file['date']
 95 |     uri = note_file['uri']
 96 | 
 97 |     with open(os.path.join(base_path, title+'.md'), 'w', encoding='utf-8') as out:
 98 |         title_line= "# "+title[11:]+"\n\n"
 99 |         tag_line = "tags: "+ " ".join([i for i in " ".join(tags).split(" ") if len(i)>1])+"\n"
100 |         uri_line = "uri: ["+title[11:]+"]("+uri+")\n"
101 |         date_line = "date: "+ date+"\n"
102 | 
103 |         high_line = ""
104 |         for index,high in enumerate(note_file['highlights']):
105 |             high_line += high +"\n"
106 |             high_line += "[hypothesis ref]("+note_file['url'][0]+")\n\n ----\n"
107 | 
108 |         out.writelines([title_line,
109 |                         tag_line,
110 |                         uri_line,
111 |                         date_line,
112 |                         "### highlight:\n",high_line])
113 |         
114 | dotenv_file = find_dotenv()
115 | set_key(dotenv_file, "hypothesis_last_pull", last_pull)
116 | 
117 | 


--------------------------------------------------------------------------------
/get_pinboard_notes.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dotenv import load_dotenv, find_dotenv, set_key
 3 | load_dotenv() 
 4 | 
 5 | from dateutil.parser import parse
 6 | 
 7 | import pandas as pd
 8 | import json
 9 | import string
10 | 
11 | import pinboard
12 | 
13 | import secrets as se
14 | 
15 | pk = se.pinboard_key
16 | pb = pinboard.Pinboard(pk)
17 | 
18 | out = pb.posts.all(parse_response=False)
19 | out = json.loads(out.read().decode("utf-8"))
20 | 
21 | # pull last import date from .env file
22 | pinboard_last_pull = os.getenv('pinboard_last_pull')
23 | print("pinboard last date pulled: ", pinboard_last_pull)
24 | 
25 | # if None, set to 1990 
26 | if pinboard_last_pull is None:
27 |     pinboard_last_pull = "1990-01-01"
28 | 
29 | # filter bookmarks based on last pulled date
30 | out = [i for i in out if parse(i['time'][:10])>=parse(pinboard_last_pull)]
31 | print ("new notes: ", len(out))
32 | 
33 | assert len(out)>0
34 | 
35 | # update last pull in .env file
36 | last_pull = pd.DataFrame(out).time.max()[:10]
37 | 
38 | dotenv_file = find_dotenv()
39 | set_key(dotenv_file, "pinboard_last_pull", last_pull)
40 | 
41 | # bundle new notes to unique folder
42 | os.mkdir("out/pinboard/"+last_pull)
43 | 
44 | for pb_note in out:
45 | 
46 |     href = pb_note['href']
47 |     title = pb_note['description']
48 |     filename = title.translate(str.maketrans('', '', string.punctuation)).lower()
49 |     filename = filename.replace(" ", "-")
50 |     note = pb_note['extended']
51 |     date = pb_note['time'][:10]
52 |     tags = pb_note['tags']
53 | 
54 |     if pb_note['toread'] == 'yes':
55 |         tags += ' to-read' 
56 |     tags = " ".join(["#"+i for i in tags.split(" ")]) 
57 | 
58 |     with open("out/pinboard/"+last_pull+"/"+filename+'.md','w') as file:
59 |             title_line= "# "+title+"\n\n"
60 |             tag_line = "tags: "+ tags +"\n\n"
61 |             uri_line = "uri: ["+title+"]("+href+")\n"
62 |             date_line = "date: "+ date+"\n"
63 |             if len(note)>0:
64 |                 note_line = "### note:\n"+note
65 |             else:
66 |                 note_line = ""
67 | 
68 |             file.writelines([title_line,
69 |                              date_line,
70 |                             tag_line,
71 |                             uri_line,
72 |                             note_line])
73 |         
74 | 
75 | 


--------------------------------------------------------------------------------