├── output
    └── .gitignore
├── .gitignore
├── README.rst
├── kindle.py
└── kindle_python3.py


/output/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.txt
3 | clips.json
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Kindle Clippings
 2 | ================
 3 | 
 4 | A simple python script to extract clippings from 'My Clippings.txt', organize, store and output them in a more elegant way.
 5 | 
 6 | Features
 7 | --------
 8 | 
 9 | Clippings are stored in a python dict with this structure
10 | 
11 | .. code:: py
12 | 
13 |     clips = {'book': {'position': 'clipping'}}
14 | 
15 | Msgpack was used to serialize clippings for archive.
16 | 
17 | Each new `My Clippings.txt` will add clips to previous archive automatically.
18 | 
19 | Clips will be export to `output` directory, find them there.
20 | 
21 | It's EASY and you don't need to care nothing!
22 | 
23 | 
24 | Usage
25 | -----
26 | 
27 | Install `msgpack-python` first.
28 | 
29 | .. code:: bash
30 | 
31 |     $ pip install msgpack-python
32 | 
33 | Clone project and put `My\ Clippings.txt` to project's root.
34 | 
35 | Run `kindle.py`
36 | 
37 | .. code:: bash
38 | 
39 |     $ python kindle_python3.py
40 | 
41 | DONE!
42 | 
43 | 
44 | Demo
45 | ----
46 | 
47 | Example output files tree:
48 | 
49 | .. code:: bash
50 | 
51 |     $ tree .
52 |     .
53 |     ├── My Clippings.txt
54 |     ├── README.rst
55 |     ├── clips.msgpack
56 |     ├── kindle.py
57 |     └── output
58 |         ├── Hackers & Painters (Paul Graham).txt
59 |         ├── Life of Pi (Yann Martel).txt
60 | 
61 | Example output file contet:
62 | 
63 |     Nerds aren't losers. They're just playing a different game, and a game much closer to the one played in the real world. Adults know this. It's hard to find successful adults now who don't claim to have been nerds in high school.
64 | 
65 |     ---
66 | 
67 |     What hackers and painters have in common is that they're both makers. Along with composers, architects, and writers, what hackers and painters are trying to do is make good things. They're not doing research per se, though if in the course of trying to make good things they discover some new technique, so much the better.
68 | 
69 |     ---
70 | 
71 |     This is not a problem for big companies, because they don't win by making great products. Big companies win by sucking less than other big companies.
72 | 
73 | 


--------------------------------------------------------------------------------
/kindle.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import collections
 5 | import json
 6 | import os
 7 | import re
 8 | 
 9 | BOUNDARY = u"==========\r\n"
10 | DATA_FILE = u"clips.json"
11 | OUTPUT_DIR = u"output"
12 | 
13 | 
14 | def get_sections(filename):
15 |     with open(filename, 'rb') as f:
16 |         content = f.read().decode('utf-8')
17 |     content = content.replace(u'\ufeff', u'')
18 |     return content.split(BOUNDARY)
19 | 
20 | 
21 | def get_clip(section):
22 |     clip = {}
23 | 
24 |     lines = [l for l in section.split(u'\r\n') if l]
25 |     if len(lines) != 3:
26 |         return
27 | 
28 |     clip['book'] = lines[0]
29 |     match = re.search(r'(\d+)-\d+', lines[1])
30 |     if not match:
31 |         return
32 |     position = match.group(1)
33 | 
34 |     clip['position'] = int(position)
35 |     clip['content'] = lines[2]
36 | 
37 |     return clip
38 | 
39 | 
40 | def export_txt(clips):
41 |     """
42 |     Export each book's clips to single text.
43 |     """
44 |     for book in clips:
45 |         lines = []
46 |         for pos in sorted(clips[book]):
47 |             lines.append(clips[book][pos].encode('utf-8'))
48 | 
49 |         filename = os.path.join(OUTPUT_DIR, u"%s.md" % book)
50 |         with open(filename, 'wb') as f:
51 |             f.write("\n\n---\n\n".join(lines))
52 | 
53 | 
54 | def load_clips():
55 |     """
56 |     Load previous clips from DATA_FILE
57 |     """
58 |     try:
59 |         with open(DATA_FILE, 'rb') as f:
60 |             return json.load(f)
61 |     except (IOError, ValueError):
62 |         return {}
63 | 
64 | 
65 | def save_clips(clips):
66 |     """
67 |     Save new clips to DATA_FILE
68 |     """
69 |     with open(DATA_FILE, 'wb') as f:
70 |         json.dump(clips, f)
71 | 
72 | 
73 | def main():
74 |     # load old clips
75 |     clips = collections.defaultdict(dict)
76 |     clips.update(load_clips())
77 | 
78 |     # extract clips
79 |     sections = get_sections(u'My Clippings.txt')
80 |     for section in sections:
81 |         clip = get_clip(section)
82 |         if clip:
83 |             clips[clip['book']][str(clip['position'])] = clip['content']
84 | 
85 |     # remove key with empty value
86 |     clips = {k: v for k, v in clips.items() if v}
87 | 
88 |     # save/export clips
89 |     save_clips(clips)
90 |     export_txt(clips)
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     main()
95 | 


--------------------------------------------------------------------------------
/kindle_python3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import collections
 5 | import json
 6 | import os
 7 | import re
 8 | 
 9 | BOUNDARY = u"==========\r\n"
10 | DATA_FILE = u"clips.json"
11 | OUTPUT_DIR = u"output"
12 | 
13 | 
14 | def get_sections(filename):
15 |     with open(filename, 'rb') as f:
16 |         content = f.read().decode('utf-8')
17 |     content = content.replace(u'\ufeff', u'')
18 |     return content.split(BOUNDARY)
19 | 
20 | 
21 | def get_clip(section):
22 |     clip = {}
23 | 
24 |     lines = [l for l in section.split(u'\r\n') if l]
25 |     if len(lines) != 3:
26 |         return
27 | 
28 |     clip['book'] = lines[0]
29 |     match = re.search(r'(\d+)-\d+', lines[1])
30 |     if not match:
31 |         return
32 |     position = match.group(1)
33 | 
34 |     clip['position'] = int(position)
35 |     clip['content'] = lines[2]
36 | 
37 |     return clip
38 | 
39 | 
40 | def export_txt(clips):
41 |     """
42 |     Export each book's clips to single text.
43 |     """
44 |     for book in clips:
45 |         lines = []
46 |         for pos in sorted(clips[book]):
47 |             lines.append(clips[book][pos].encode('utf-8'))
48 | 
49 |         filename = os.path.join(OUTPUT_DIR, u"%s.md" % book)
50 |         # with open(filename, 'wb') as f:
51 |         with open(filename, 'wb') as f:
52 |             f.write(b"\n\n---\n\n".join(lines))
53 | 
54 | 
55 | def load_clips():
56 |     """
57 |     Load previous clips from DATA_FILE
58 |     """
59 |     try:
60 |         with open(DATA_FILE, 'rb') as f:
61 |             return json.load(f)
62 |     except (IOError, ValueError):
63 |         return {}
64 | 
65 | 
66 | def save_clips(clips):
67 |     """
68 |     Save new clips to DATA_FILE
69 |     """
70 |     # with open(DATA_FILE, 'wb') as f:
71 |     # with open(DATA_FILE, 'wb') as f:
72 |     with open(DATA_FILE, 'w') as f:
73 |         json.dump(clips, f)
74 | 
75 | 
76 | def main():
77 |     # load old clips
78 |     clips = collections.defaultdict(dict)
79 |     clips.update(load_clips())
80 | 
81 |     # extract clips
82 |     sections = get_sections(u'My Clippings.txt')
83 |     for section in sections:
84 |         clip = get_clip(section)
85 |         if clip:
86 |             clips[clip['book']][str(clip['position'])] = clip['content']
87 | 
88 |     # remove key with empty value
89 |     clips = {k: v for k, v in clips.items() if v}
90 | 
91 |     # save/export clips
92 |     save_clips(clips)
93 |     export_txt(clips)
94 | 
95 | 
96 | if __name__ == '__main__':
97 |     main()
98 | 


--------------------------------------------------------------------------------