├── output └── .gitignore ├── .gitignore ├── README.rst ├── kindle.py └── kindle_python3.py /output/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.txt 3 | clips.json 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Kindle Clippings 2 | ================ 3 | 4 | A simple python script to extract clippings from 'My Clippings.txt', organize, store and output them in a more elegant way. 5 | 6 | Features 7 | -------- 8 | 9 | Clippings are stored in a python dict with this structure 10 | 11 | .. code:: py 12 | 13 | clips = {'book': {'position': 'clipping'}} 14 | 15 | Msgpack was used to serialize clippings for archive. 16 | 17 | Each new `My Clippings.txt` will add clips to previous archive automatically. 18 | 19 | Clips will be export to `output` directory, find them there. 20 | 21 | It's EASY and you don't need to care nothing! 22 | 23 | 24 | Usage 25 | ----- 26 | 27 | Install `msgpack-python` first. 28 | 29 | .. code:: bash 30 | 31 | $ pip install msgpack-python 32 | 33 | Clone project and put `My\ Clippings.txt` to project's root. 34 | 35 | Run `kindle.py` 36 | 37 | .. code:: bash 38 | 39 | $ python kindle_python3.py 40 | 41 | DONE! 42 | 43 | 44 | Demo 45 | ---- 46 | 47 | Example output files tree: 48 | 49 | .. code:: bash 50 | 51 | $ tree . 52 | . 53 | ├── My Clippings.txt 54 | ├── README.rst 55 | ├── clips.msgpack 56 | ├── kindle.py 57 | └── output 58 | ├── Hackers & Painters (Paul Graham).txt 59 | ├── Life of Pi (Yann Martel).txt 60 | 61 | Example output file contet: 62 | 63 | Nerds aren't losers. They're just playing a different game, and a game much closer to the one played in the real world. Adults know this. It's hard to find successful adults now who don't claim to have been nerds in high school. 64 | 65 | --- 66 | 67 | What hackers and painters have in common is that they're both makers. Along with composers, architects, and writers, what hackers and painters are trying to do is make good things. They're not doing research per se, though if in the course of trying to make good things they discover some new technique, so much the better. 68 | 69 | --- 70 | 71 | This is not a problem for big companies, because they don't win by making great products. Big companies win by sucking less than other big companies. 72 | 73 | -------------------------------------------------------------------------------- /kindle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import collections 5 | import json 6 | import os 7 | import re 8 | 9 | BOUNDARY = u"==========\r\n" 10 | DATA_FILE = u"clips.json" 11 | OUTPUT_DIR = u"output" 12 | 13 | 14 | def get_sections(filename): 15 | with open(filename, 'rb') as f: 16 | content = f.read().decode('utf-8') 17 | content = content.replace(u'\ufeff', u'') 18 | return content.split(BOUNDARY) 19 | 20 | 21 | def get_clip(section): 22 | clip = {} 23 | 24 | lines = [l for l in section.split(u'\r\n') if l] 25 | if len(lines) != 3: 26 | return 27 | 28 | clip['book'] = lines[0] 29 | match = re.search(r'(\d+)-\d+', lines[1]) 30 | if not match: 31 | return 32 | position = match.group(1) 33 | 34 | clip['position'] = int(position) 35 | clip['content'] = lines[2] 36 | 37 | return clip 38 | 39 | 40 | def export_txt(clips): 41 | """ 42 | Export each book's clips to single text. 43 | """ 44 | for book in clips: 45 | lines = [] 46 | for pos in sorted(clips[book]): 47 | lines.append(clips[book][pos].encode('utf-8')) 48 | 49 | filename = os.path.join(OUTPUT_DIR, u"%s.md" % book) 50 | with open(filename, 'wb') as f: 51 | f.write("\n\n---\n\n".join(lines)) 52 | 53 | 54 | def load_clips(): 55 | """ 56 | Load previous clips from DATA_FILE 57 | """ 58 | try: 59 | with open(DATA_FILE, 'rb') as f: 60 | return json.load(f) 61 | except (IOError, ValueError): 62 | return {} 63 | 64 | 65 | def save_clips(clips): 66 | """ 67 | Save new clips to DATA_FILE 68 | """ 69 | with open(DATA_FILE, 'wb') as f: 70 | json.dump(clips, f) 71 | 72 | 73 | def main(): 74 | # load old clips 75 | clips = collections.defaultdict(dict) 76 | clips.update(load_clips()) 77 | 78 | # extract clips 79 | sections = get_sections(u'My Clippings.txt') 80 | for section in sections: 81 | clip = get_clip(section) 82 | if clip: 83 | clips[clip['book']][str(clip['position'])] = clip['content'] 84 | 85 | # remove key with empty value 86 | clips = {k: v for k, v in clips.items() if v} 87 | 88 | # save/export clips 89 | save_clips(clips) 90 | export_txt(clips) 91 | 92 | 93 | if __name__ == '__main__': 94 | main() 95 | -------------------------------------------------------------------------------- /kindle_python3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import collections 5 | import json 6 | import os 7 | import re 8 | 9 | BOUNDARY = u"==========\r\n" 10 | DATA_FILE = u"clips.json" 11 | OUTPUT_DIR = u"output" 12 | 13 | 14 | def get_sections(filename): 15 | with open(filename, 'rb') as f: 16 | content = f.read().decode('utf-8') 17 | content = content.replace(u'\ufeff', u'') 18 | return content.split(BOUNDARY) 19 | 20 | 21 | def get_clip(section): 22 | clip = {} 23 | 24 | lines = [l for l in section.split(u'\r\n') if l] 25 | if len(lines) != 3: 26 | return 27 | 28 | clip['book'] = lines[0] 29 | match = re.search(r'(\d+)-\d+', lines[1]) 30 | if not match: 31 | return 32 | position = match.group(1) 33 | 34 | clip['position'] = int(position) 35 | clip['content'] = lines[2] 36 | 37 | return clip 38 | 39 | 40 | def export_txt(clips): 41 | """ 42 | Export each book's clips to single text. 43 | """ 44 | for book in clips: 45 | lines = [] 46 | for pos in sorted(clips[book]): 47 | lines.append(clips[book][pos].encode('utf-8')) 48 | 49 | filename = os.path.join(OUTPUT_DIR, u"%s.md" % book) 50 | # with open(filename, 'wb') as f: 51 | with open(filename, 'wb') as f: 52 | f.write(b"\n\n---\n\n".join(lines)) 53 | 54 | 55 | def load_clips(): 56 | """ 57 | Load previous clips from DATA_FILE 58 | """ 59 | try: 60 | with open(DATA_FILE, 'rb') as f: 61 | return json.load(f) 62 | except (IOError, ValueError): 63 | return {} 64 | 65 | 66 | def save_clips(clips): 67 | """ 68 | Save new clips to DATA_FILE 69 | """ 70 | # with open(DATA_FILE, 'wb') as f: 71 | # with open(DATA_FILE, 'wb') as f: 72 | with open(DATA_FILE, 'w') as f: 73 | json.dump(clips, f) 74 | 75 | 76 | def main(): 77 | # load old clips 78 | clips = collections.defaultdict(dict) 79 | clips.update(load_clips()) 80 | 81 | # extract clips 82 | sections = get_sections(u'My Clippings.txt') 83 | for section in sections: 84 | clip = get_clip(section) 85 | if clip: 86 | clips[clip['book']][str(clip['position'])] = clip['content'] 87 | 88 | # remove key with empty value 89 | clips = {k: v for k, v in clips.items() if v} 90 | 91 | # save/export clips 92 | save_clips(clips) 93 | export_txt(clips) 94 | 95 | 96 | if __name__ == '__main__': 97 | main() 98 | --------------------------------------------------------------------------------