├── README.md
├── kindle-html-to-anki.py
└── kindle-to-anki.js


/README.md:
--------------------------------------------------------------------------------
 1 | Kindle → Anki
 2 | =============
 3 | 
 4 | My Clippings.txt from your Kindle
 5 | ---------------------------------
 6 | 
 7 | Convert a Kindle "My Clippings.txt" file to a tab-separated values file. Anki
 8 | can import this file.
 9 | 
10 | It's work in progress. Don't use it.
11 | 
12 |      node kindle-to-anki.js My\ Clippings.txt
13 | 
14 | Highlights in HTML
15 | ------------------
16 | 
17 | For highlights downloaded from Amazon's ["Your Highlights"](https://kindle.amazon.com/your_highlights)
18 | page, use `kindle-html-to-anki.py`.
19 | 
20 |      mv Foo\ Highlights.html Highlights.html
21 |      python3 kindle-html-to-anki.py > anki-import.txt
22 | 
23 | Order of the fields exported
24 | ----------------------------
25 | - Book
26 | - Clipping text
27 | - Location in the book
28 | - Date
29 | 


--------------------------------------------------------------------------------
/kindle-html-to-anki.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from bs4 import BeautifulSoup
 3 | from re import sub
 4 | 
 5 | 
 6 | def extract_books(soup):
 7 |     """Extract books from the given soup."""
 8 |     books = {}
 9 |     for d in soup.find_all('div', 'bookMain'):
10 |         asin = sub(r'_.*$', '', d['id'])
11 |         title = d.find('span', 'title').text.strip()
12 |         author = sub(r'by ', '', d.find('span', 'author').text.strip())
13 |         author = sub('\n', '', author)
14 | 
15 |         books[asin] = dict(asin=asin, title=title, author=author)
16 | 
17 |     return books
18 | 
19 | 
20 | def extract_highlights(soup, books):
21 |     """Extract highlights from a soup."""
22 |     clippings = []
23 |     for d in soup.find_all('div', 'yourHighlight'):
24 |         try:
25 |             clipping_text = d.find('span', 'highlight').text
26 |             clipping_text = sub('\n', '', clipping_text)
27 |             asin = d.find('span', 'asin').text
28 |             loc = d.find('span', 'end_location').text
29 |             book = books[asin]
30 |             clipping = {
31 |                 'book': '{} ({})'.format(book['title'], book['author']),
32 |                 'clipping_text': clipping_text,
33 |                 'location': loc,
34 |                 'date': '',
35 |             }
36 |             clippings.append(clipping)
37 |         except AttributeError:
38 |             pass
39 |     return clippings
40 | 
41 | 
42 | def main():
43 |     with open("Highlights.html") as f:
44 |         soup = BeautifulSoup(f)
45 |         books = extract_books(soup)
46 | 
47 |         clippings = extract_highlights(soup, books)
48 |         for clipping in clippings:
49 |             print("\t".join([clipping['book'],
50 |                              clipping['clipping_text'],
51 |                              clipping['location'],
52 |                              clipping['date']]))
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     main()
57 | 
58 | # vim:set fileencoding=utf-8:
59 | 


--------------------------------------------------------------------------------
/kindle-to-anki.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | 
  4 | var Exception = function() {}
  5 | Exception.prototype.toString = function() {
  6 |   var name = this.name || 'unknown';
  7 |   var message  = this.message || 'no description';
  8 |   return '[' + name + ']' + message;
  9 | };
 10 | function InvalidInputException(message) {
 11 |   this.name = 'Invalid input';
 12 |   this.message = message;
 13 | };
 14 | InvalidInputException.prototype = new Exception();
 15 | 
 16 | // XXX Document
 17 | var parse_clippings = function(clippings_text) {
 18 | 
 19 |   var clippings = [];
 20 | 
 21 |   // Remove U+FEFF
 22 |   clippings_text = clippings_text.replace(/^\ufeff/, '');
 23 | 
 24 |   // Split up clippings, the last one is empty, so throw it away.
 25 |   var clippings_texts = clippings_text.split("\r\n==========\r\n");
 26 |   clippings_texts = clippings_texts.slice(0, clippings_texts.length-1);
 27 | 
 28 |   // Parse clippings
 29 |   for(var i=0; i<clippings_texts.length; i++) {
 30 |     var clipping_text = clippings_texts[i];
 31 |     var clipping = {};
 32 | 
 33 |     var lines = clipping_text.split("\r\n");
 34 |     if (lines.length < 4) {
 35 |       throw new InvalidInputException("Clipping is too short:\n\n"
 36 |           + clipping_text);
 37 |     }
 38 | 
 39 |     // Parse the lines of a clipping
 40 |     clipping["book"] = lines[0];
 41 |     var meta = lines[1];
 42 |     if (lines[2] !== "")
 43 |       throw new InvalidInputException("Third line should be empty:\n\n"
 44 |           + clipping_text);
 45 |     clipping["text"] = lines.slice(3, lines.length).join("\n");
 46 | 
 47 |     // Actually, parse meta further
 48 |     var metas = meta.split("|");
 49 |     clipping["loc"] = metas.slice(0, metas.length-1).join()
 50 |       .replace(/-?\s*Highlight (on)?\s*/, '')
 51 |       .replace(/\s*$/, '');
 52 |     clipping["date"] = metas[metas.length-1]
 53 |       .replace(/\s*Added on\s*/, '');
 54 | 
 55 |     clippings.push(clipping);
 56 |   }
 57 | 
 58 |   return clippings;
 59 | };
 60 | 
 61 | var TSVify = function(array_of_hashes) {
 62 |   var tsv = "";
 63 | 
 64 |   // Put double quotes around some text (which may contain quotes itself, which
 65 |   // are replaces by other quotes as Anki does not like quoted quotes...)
 66 |   var quotes = function(text) {
 67 |     return '"' + text.replace(/"/g, '“') + '"';
 68 |   }
 69 | 
 70 |   // Check input
 71 |   if (!(array_of_hashes instanceof Array))
 72 |     throw new InvalidInputException("Not an array");
 73 |   if (array_of_hashes.length===0)
 74 |     throw new InvalidInputException("Zero length array");
 75 |   // FIXME
 76 |   // for(var i=0; i<array_of_hashes.length; i++)
 77 |   //  if (!(array_of_hashes[i] instanceof Array))
 78 |   //    throw new InvalidInputException("Not an array of hashes");
 79 | 
 80 |   // First line decides the fields
 81 |   var fields = Object.keys(array_of_hashes[0]);
 82 | 
 83 |   // Build TSV text from data
 84 |   for(var i=0; i<array_of_hashes.length; i++) {
 85 |     tsv += fields.map(function(field) {
 86 |       return quotes(array_of_hashes[i][field]);
 87 |     })
 88 |     .join("\t")
 89 |       + "\n";
 90 |   }
 91 | 
 92 |   return tsv;
 93 | };
 94 | 
 95 | // TSVify(1);
 96 | // TSVify("foo");
 97 | // TSVify(["foo"]);
 98 | // FIXME TSVify([["foo"]]);
 99 | 
100 | var fs = require('fs');
101 | 
102 | var clippings_files = process.argv.slice(2);
103 | 
104 | clippings_files.forEach(function(clippings_file) {
105 |   fs.readFile(clippings_file, 'utf-8', function(err, text) {
106 |     if (err) {
107 |       throw new InvalidInputException('Error opening "' + clippings_file + '"');
108 |     } else {
109 |       var clippings = parse_clippings(text);
110 |       var tsv = TSVify(clippings);
111 | 
112 |       process.stdout.write(tsv);
113 |     }
114 |   });
115 | });
116 | 
117 | 
118 | // FIXME: node clippings.js | head -1 => Unhandled 'error' event
119 | 


--------------------------------------------------------------------------------