├── README.md
├── kindle-html-to-anki.py
└── kindle-to-anki.js
/README.md:
--------------------------------------------------------------------------------
1 | Kindle → Anki
2 | =============
3 |
4 | My Clippings.txt from your Kindle
5 | ---------------------------------
6 |
7 | Convert a Kindle "My Clippings.txt" file to a tab-separated values file. Anki
8 | can import this file.
9 |
10 | It's work in progress. Don't use it.
11 |
12 | node kindle-to-anki.js My\ Clippings.txt
13 |
14 | Highlights in HTML
15 | ------------------
16 |
17 | For highlights downloaded from Amazon's ["Your Highlights"](https://kindle.amazon.com/your_highlights)
18 | page, use `kindle-html-to-anki.py`.
19 |
20 | mv Foo\ Highlights.html Highlights.html
21 | python3 kindle-html-to-anki.py > anki-import.txt
22 |
23 | Order of the fields exported
24 | ----------------------------
25 | - Book
26 | - Clipping text
27 | - Location in the book
28 | - Date
29 |
--------------------------------------------------------------------------------
/kindle-html-to-anki.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from bs4 import BeautifulSoup
3 | from re import sub
4 |
5 |
6 | def extract_books(soup):
7 | """Extract books from the given soup."""
8 | books = {}
9 | for d in soup.find_all('div', 'bookMain'):
10 | asin = sub(r'_.*$', '', d['id'])
11 | title = d.find('span', 'title').text.strip()
12 | author = sub(r'by ', '', d.find('span', 'author').text.strip())
13 | author = sub('\n', '', author)
14 |
15 | books[asin] = dict(asin=asin, title=title, author=author)
16 |
17 | return books
18 |
19 |
20 | def extract_highlights(soup, books):
21 | """Extract highlights from a soup."""
22 | clippings = []
23 | for d in soup.find_all('div', 'yourHighlight'):
24 | try:
25 | clipping_text = d.find('span', 'highlight').text
26 | clipping_text = sub('\n', '', clipping_text)
27 | asin = d.find('span', 'asin').text
28 | loc = d.find('span', 'end_location').text
29 | book = books[asin]
30 | clipping = {
31 | 'book': '{} ({})'.format(book['title'], book['author']),
32 | 'clipping_text': clipping_text,
33 | 'location': loc,
34 | 'date': '',
35 | }
36 | clippings.append(clipping)
37 | except AttributeError:
38 | pass
39 | return clippings
40 |
41 |
42 | def main():
43 | with open("Highlights.html") as f:
44 | soup = BeautifulSoup(f)
45 | books = extract_books(soup)
46 |
47 | clippings = extract_highlights(soup, books)
48 | for clipping in clippings:
49 | print("\t".join([clipping['book'],
50 | clipping['clipping_text'],
51 | clipping['location'],
52 | clipping['date']]))
53 |
54 |
55 | if __name__ == '__main__':
56 | main()
57 |
58 | # vim:set fileencoding=utf-8:
59 |
--------------------------------------------------------------------------------
/kindle-to-anki.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 |
4 | var Exception = function() {}
5 | Exception.prototype.toString = function() {
6 | var name = this.name || 'unknown';
7 | var message = this.message || 'no description';
8 | return '[' + name + ']' + message;
9 | };
10 | function InvalidInputException(message) {
11 | this.name = 'Invalid input';
12 | this.message = message;
13 | };
14 | InvalidInputException.prototype = new Exception();
15 |
16 | // XXX Document
17 | var parse_clippings = function(clippings_text) {
18 |
19 | var clippings = [];
20 |
21 | // Remove U+FEFF
22 | clippings_text = clippings_text.replace(/^\ufeff/, '');
23 |
24 | // Split up clippings, the last one is empty, so throw it away.
25 | var clippings_texts = clippings_text.split("\r\n==========\r\n");
26 | clippings_texts = clippings_texts.slice(0, clippings_texts.length-1);
27 |
28 | // Parse clippings
29 | for(var i=0; i Unhandled 'error' event
119 |
--------------------------------------------------------------------------------