├── README.md ├── kindle-html-to-anki.py └── kindle-to-anki.js /README.md: -------------------------------------------------------------------------------- 1 | Kindle → Anki 2 | ============= 3 | 4 | My Clippings.txt from your Kindle 5 | --------------------------------- 6 | 7 | Convert a Kindle "My Clippings.txt" file to a tab-separated values file. Anki 8 | can import this file. 9 | 10 | It's work in progress. Don't use it. 11 | 12 | node kindle-to-anki.js My\ Clippings.txt 13 | 14 | Highlights in HTML 15 | ------------------ 16 | 17 | For highlights downloaded from Amazon's ["Your Highlights"](https://kindle.amazon.com/your_highlights) 18 | page, use `kindle-html-to-anki.py`. 19 | 20 | mv Foo\ Highlights.html Highlights.html 21 | python3 kindle-html-to-anki.py > anki-import.txt 22 | 23 | Order of the fields exported 24 | ---------------------------- 25 | - Book 26 | - Clipping text 27 | - Location in the book 28 | - Date 29 | -------------------------------------------------------------------------------- /kindle-html-to-anki.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from bs4 import BeautifulSoup 3 | from re import sub 4 | 5 | 6 | def extract_books(soup): 7 | """Extract books from the given soup.""" 8 | books = {} 9 | for d in soup.find_all('div', 'bookMain'): 10 | asin = sub(r'_.*$', '', d['id']) 11 | title = d.find('span', 'title').text.strip() 12 | author = sub(r'by ', '', d.find('span', 'author').text.strip()) 13 | author = sub('\n', '', author) 14 | 15 | books[asin] = dict(asin=asin, title=title, author=author) 16 | 17 | return books 18 | 19 | 20 | def extract_highlights(soup, books): 21 | """Extract highlights from a soup.""" 22 | clippings = [] 23 | for d in soup.find_all('div', 'yourHighlight'): 24 | try: 25 | clipping_text = d.find('span', 'highlight').text 26 | clipping_text = sub('\n', '', clipping_text) 27 | asin = d.find('span', 'asin').text 28 | loc = d.find('span', 'end_location').text 29 | book = books[asin] 30 | clipping = { 31 | 'book': '{} ({})'.format(book['title'], book['author']), 32 | 'clipping_text': clipping_text, 33 | 'location': loc, 34 | 'date': '', 35 | } 36 | clippings.append(clipping) 37 | except AttributeError: 38 | pass 39 | return clippings 40 | 41 | 42 | def main(): 43 | with open("Highlights.html") as f: 44 | soup = BeautifulSoup(f) 45 | books = extract_books(soup) 46 | 47 | clippings = extract_highlights(soup, books) 48 | for clipping in clippings: 49 | print("\t".join([clipping['book'], 50 | clipping['clipping_text'], 51 | clipping['location'], 52 | clipping['date']])) 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | 58 | # vim:set fileencoding=utf-8: 59 | -------------------------------------------------------------------------------- /kindle-to-anki.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var Exception = function() {} 5 | Exception.prototype.toString = function() { 6 | var name = this.name || 'unknown'; 7 | var message = this.message || 'no description'; 8 | return '[' + name + ']' + message; 9 | }; 10 | function InvalidInputException(message) { 11 | this.name = 'Invalid input'; 12 | this.message = message; 13 | }; 14 | InvalidInputException.prototype = new Exception(); 15 | 16 | // XXX Document 17 | var parse_clippings = function(clippings_text) { 18 | 19 | var clippings = []; 20 | 21 | // Remove U+FEFF 22 | clippings_text = clippings_text.replace(/^\ufeff/, ''); 23 | 24 | // Split up clippings, the last one is empty, so throw it away. 25 | var clippings_texts = clippings_text.split("\r\n==========\r\n"); 26 | clippings_texts = clippings_texts.slice(0, clippings_texts.length-1); 27 | 28 | // Parse clippings 29 | for(var i=0; i Unhandled 'error' event 119 | --------------------------------------------------------------------------------