├── .gitignore ├── LICENSE ├── MANIFEST.in ├── Pipfile ├── Pipfile.lock ├── README.md ├── kindle2notion ├── __init__.py ├── __main__.py ├── exporting.py ├── parsing.py └── reading.py ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── test_data └── Test Clippings.txt ├── test_exporting.py └── test_parsing.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 3 | 4 | ### Python ### 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | pytestdebug.log 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | doc/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # poetry 99 | #poetry.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | # .env 113 | .env 114 | .venv/ 115 | env/ 116 | venv/ 117 | ENV/ 118 | env.bak/ 119 | venv.bak/ 120 | pythonenv* 121 | 122 | # Spyder project settings 123 | .spyderproject 124 | .spyproject 125 | 126 | # Rope project settings 127 | .ropeproject 128 | 129 | # mkdocs documentation 130 | /site 131 | 132 | # mypy 133 | .mypy_cache/ 134 | .dmypy.json 135 | dmypy.json 136 | 137 | # Pyre type checker 138 | .pyre/ 139 | 140 | # pytype static type analyzer 141 | .pytype/ 142 | 143 | # operating system-related files 144 | # file properties cache/storage on macOS 145 | *.DS_Store 146 | # thumbnail cache on Windows 147 | Thumbs.db 148 | 149 | # profiling data 150 | .prof 151 | 152 | 153 | # End of https://www.toptal.com/developers/gitignore/api/python 154 | 155 | 156 | # IDE 157 | .vscode/ 158 | # custom project files 159 | MyClippings.txt 160 | dist/ 161 | images/ 162 | .env.local 163 | My Clipping.txt 164 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jeffrey Jacob 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include requirements-dev.txt 3 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | click = ">=8.0.0" 8 | dateparser = ">=1.0.0" 9 | python-decouple = ">=3.3" 10 | requests = ">=2.25.0" 11 | pathlib = "*" 12 | notion-client = "*" 13 | DateTime = ">=4.3" 14 | kindle2notion = {editable = true, path = "."} 15 | 16 | [dev-packages] 17 | 18 | [requires] 19 | python_version = "3.9" 20 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "16640129d2e9de5bc0a42f71b6d3ab743e312c7ce6bd71406422cc5b6a22087c" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.9" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "anyio": { 20 | "hashes": [ 21 | "sha256:a0aeffe2fb1fdf374a8e4b471444f0f3ac4fb9f5a5b542b48824475e0042a5a6", 22 | "sha256:b5fa16c5ff93fa1046f2eeb5bbff2dad4d3514d6cda61d02816dba34fa8c3c2e" 23 | ], 24 | "markers": "python_full_version >= '3.6.2'", 25 | "version": "==3.5.0" 26 | }, 27 | "certifi": { 28 | "hashes": [ 29 | "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", 30 | "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" 31 | ], 32 | "version": "==2021.10.8" 33 | }, 34 | "charset-normalizer": { 35 | "hashes": [ 36 | "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", 37 | "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" 38 | ], 39 | "markers": "python_version >= '3'", 40 | "version": "==2.0.12" 41 | }, 42 | "click": { 43 | "hashes": [ 44 | "sha256:5e0d195c2067da3136efb897449ec1e9e6c98282fbf30d7f9e164af9be901a6b", 45 | "sha256:7ab900e38149c9872376e8f9b5986ddcaf68c0f413cf73678a0bca5547e6f976" 46 | ], 47 | "index": "pypi", 48 | "version": "==8.1.1" 49 | }, 50 | "dateparser": { 51 | "hashes": [ 52 | "sha256:038196b1f12c7397e38aad3d61588833257f6f552baa63a1499e6987fa8d42d9", 53 | "sha256:9600874312ff28a41f96ec7ccdc73be1d1c44435719da47fea3339d55ff5a628" 54 | ], 55 | "index": "pypi", 56 | "version": "==1.1.1" 57 | }, 58 | "datetime": { 59 | "hashes": [ 60 | "sha256:074b0c63d4328f4de30662786fa436a0567145b2ab02f47be5af68f58afc03ec", 61 | "sha256:4fcca115ddb466a1104df08d5c2a2f5805d14ca317800e1bfcea8f3d69f66e57" 62 | ], 63 | "index": "pypi", 64 | "version": "==4.4" 65 | }, 66 | "h11": { 67 | "hashes": [ 68 | "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6", 69 | "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042" 70 | ], 71 | "markers": "python_version >= '3.6'", 72 | "version": "==0.12.0" 73 | }, 74 | "httpcore": { 75 | "hashes": [ 76 | "sha256:47d772f754359e56dd9d892d9593b6f9870a37aeb8ba51e9a88b09b3d68cfade", 77 | "sha256:7503ec1c0f559066e7e39bc4003fd2ce023d01cf51793e3c173b864eb456ead1" 78 | ], 79 | "markers": "python_version >= '3.6'", 80 | "version": "==0.14.7" 81 | }, 82 | "httpx": { 83 | "hashes": [ 84 | "sha256:d8e778f76d9bbd46af49e7f062467e3157a5a3d2ae4876a4bbfd8a51ed9c9cb4", 85 | "sha256:e35e83d1d2b9b2a609ef367cc4c1e66fd80b750348b20cc9e19d1952fc2ca3f6" 86 | ], 87 | "markers": "python_version >= '3.6'", 88 | "version": "==0.22.0" 89 | }, 90 | "idna": { 91 | "hashes": [ 92 | "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", 93 | "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" 94 | ], 95 | "version": "==3.3" 96 | }, 97 | "kindle2notion": { 98 | "editable": true, 99 | "path": "." 100 | }, 101 | "notion-client": { 102 | "hashes": [ 103 | "sha256:5a11b5fdbf00210ff256c507a7b0c7aa2842c076b22636dbb779ed9240e40e0d", 104 | "sha256:7438848aa0de81bf2183524ba5cc6caf5b9cba84e1da5051c822ecb31de89b00" 105 | ], 106 | "index": "pypi", 107 | "version": "==0.9.0" 108 | }, 109 | "pathlib": { 110 | "hashes": [ 111 | "sha256:6940718dfc3eff4258203ad5021090933e5c04707d5ca8cc9e73c94a7894ea9f" 112 | ], 113 | "index": "pypi", 114 | "version": "==1.0.1" 115 | }, 116 | "python-dateutil": { 117 | "hashes": [ 118 | "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", 119 | "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" 120 | ], 121 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", 122 | "version": "==2.8.2" 123 | }, 124 | "python-decouple": { 125 | "hashes": [ 126 | "sha256:2838cdf77a5cf127d7e8b339ce14c25bceb3af3e674e039d4901ba16359968c7", 127 | "sha256:6cf502dc963a5c642ea5ead069847df3d916a6420cad5599185de6bab11d8c2e" 128 | ], 129 | "index": "pypi", 130 | "version": "==3.6" 131 | }, 132 | "pytz": { 133 | "hashes": [ 134 | "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", 135 | "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c" 136 | ], 137 | "version": "==2022.1" 138 | }, 139 | "pytz-deprecation-shim": { 140 | "hashes": [ 141 | "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6", 142 | "sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d" 143 | ], 144 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", 145 | "version": "==0.1.0.post0" 146 | }, 147 | "regex": { 148 | "hashes": [ 149 | "sha256:0008650041531d0eadecc96a73d37c2dc4821cf51b0766e374cb4f1ddc4e1c14", 150 | "sha256:03299b0bcaa7824eb7c0ebd7ef1e3663302d1b533653bfe9dc7e595d453e2ae9", 151 | "sha256:06b1df01cf2aef3a9790858af524ae2588762c8a90e784ba00d003f045306204", 152 | "sha256:09b4b6ccc61d4119342b26246ddd5a04accdeebe36bdfe865ad87a0784efd77f", 153 | "sha256:0be0c34a39e5d04a62fd5342f0886d0e57592a4f4993b3f9d257c1f688b19737", 154 | "sha256:0d96eec8550fd2fd26f8e675f6d8b61b159482ad8ffa26991b894ed5ee19038b", 155 | "sha256:0eb0e2845e81bdea92b8281a3969632686502565abf4a0b9e4ab1471c863d8f3", 156 | "sha256:13bbf0c9453c6d16e5867bda7f6c0c7cff1decf96c5498318bb87f8136d2abd4", 157 | "sha256:17e51ad1e6131c496b58d317bc9abec71f44eb1957d32629d06013a21bc99cac", 158 | "sha256:1977bb64264815d3ef016625adc9df90e6d0e27e76260280c63eca993e3f455f", 159 | "sha256:1e30762ddddb22f7f14c4f59c34d3addabc789216d813b0f3e2788d7bcf0cf29", 160 | "sha256:1e73652057473ad3e6934944af090852a02590c349357b79182c1b681da2c772", 161 | "sha256:20e6a27959f162f979165e496add0d7d56d7038237092d1aba20b46de79158f1", 162 | "sha256:286ff9ec2709d56ae7517040be0d6c502642517ce9937ab6d89b1e7d0904f863", 163 | "sha256:297c42ede2c81f0cb6f34ea60b5cf6dc965d97fa6936c11fc3286019231f0d66", 164 | "sha256:320c2f4106962ecea0f33d8d31b985d3c185757c49c1fb735501515f963715ed", 165 | "sha256:35ed2f3c918a00b109157428abfc4e8d1ffabc37c8f9abc5939ebd1e95dabc47", 166 | "sha256:3d146e5591cb67c5e836229a04723a30af795ef9b70a0bbd913572e14b7b940f", 167 | "sha256:42bb37e2b2d25d958c25903f6125a41aaaa1ed49ca62c103331f24b8a459142f", 168 | "sha256:42d6007722d46bd2c95cce700181570b56edc0dcbadbfe7855ec26c3f2d7e008", 169 | "sha256:43eba5c46208deedec833663201752e865feddc840433285fbadee07b84b464d", 170 | "sha256:452519bc4c973e961b1620c815ea6dd8944a12d68e71002be5a7aff0a8361571", 171 | "sha256:4b9c16a807b17b17c4fa3a1d8c242467237be67ba92ad24ff51425329e7ae3d0", 172 | "sha256:5510932596a0f33399b7fff1bd61c59c977f2b8ee987b36539ba97eb3513584a", 173 | "sha256:55820bc631684172b9b56a991d217ec7c2e580d956591dc2144985113980f5a3", 174 | "sha256:57484d39447f94967e83e56db1b1108c68918c44ab519b8ecfc34b790ca52bf7", 175 | "sha256:58ba41e462653eaf68fc4a84ec4d350b26a98d030be1ab24aba1adcc78ffe447", 176 | "sha256:5bc5f921be39ccb65fdda741e04b2555917a4bced24b4df14eddc7569be3b493", 177 | "sha256:5dcc4168536c8f68654f014a3db49b6b4a26b226f735708be2054314ed4964f4", 178 | "sha256:5f92a7cdc6a0ae2abd184e8dfd6ef2279989d24c85d2c85d0423206284103ede", 179 | "sha256:67250b36edfa714ba62dc62d3f238e86db1065fccb538278804790f578253640", 180 | "sha256:6df070a986fc064d865c381aecf0aaff914178fdf6874da2f2387e82d93cc5bd", 181 | "sha256:729aa8ca624c42f309397c5fc9e21db90bf7e2fdd872461aabdbada33de9063c", 182 | "sha256:72bc3a5effa5974be6d965ed8301ac1e869bc18425c8a8fac179fbe7876e3aee", 183 | "sha256:74d86e8924835f863c34e646392ef39039405f6ce52956d8af16497af4064a30", 184 | "sha256:79e5af1ff258bc0fe0bdd6f69bc4ae33935a898e3cbefbbccf22e88a27fa053b", 185 | "sha256:7b103dffb9f6a47ed7ffdf352b78cfe058b1777617371226c1894e1be443afec", 186 | "sha256:83f03f0bd88c12e63ca2d024adeee75234d69808b341e88343b0232329e1f1a1", 187 | "sha256:86d7a68fa53688e1f612c3246044157117403c7ce19ebab7d02daf45bd63913e", 188 | "sha256:878c626cbca3b649e14e972c14539a01191d79e58934e3f3ef4a9e17f90277f8", 189 | "sha256:878f5d649ba1db9f52cc4ef491f7dba2d061cdc48dd444c54260eebc0b1729b9", 190 | "sha256:87bc01226cd288f0bd9a4f9f07bf6827134dc97a96c22e2d28628e824c8de231", 191 | "sha256:8babb2b5751105dc0aef2a2e539f4ba391e738c62038d8cb331c710f6b0f3da7", 192 | "sha256:91e0f7e7be77250b808a5f46d90bf0032527d3c032b2131b63dee54753a4d729", 193 | "sha256:9557545c10d52c845f270b665b52a6a972884725aa5cf12777374e18f2ea8960", 194 | "sha256:9ccb0a4ab926016867260c24c192d9df9586e834f5db83dfa2c8fffb3a6e5056", 195 | "sha256:9d828c5987d543d052b53c579a01a52d96b86f937b1777bbfe11ef2728929357", 196 | "sha256:9efa41d1527b366c88f265a227b20bcec65bda879962e3fc8a2aee11e81266d7", 197 | "sha256:aaf5317c961d93c1a200b9370fb1c6b6836cc7144fef3e5a951326912bf1f5a3", 198 | "sha256:ab69b4fe09e296261377d209068d52402fb85ef89dc78a9ac4a29a895f4e24a7", 199 | "sha256:ad397bc7d51d69cb07ef89e44243f971a04ce1dca9bf24c992c362406c0c6573", 200 | "sha256:ae17fc8103f3b63345709d3e9654a274eee1c6072592aec32b026efd401931d0", 201 | "sha256:af4d8cc28e4c7a2f6a9fed544228c567340f8258b6d7ea815b62a72817bbd178", 202 | "sha256:b22ff939a8856a44f4822da38ef4868bd3a9ade22bb6d9062b36957c850e404f", 203 | "sha256:b549d851f91a4efb3e65498bd4249b1447ab6035a9972f7fc215eb1f59328834", 204 | "sha256:be319f4eb400ee567b722e9ea63d5b2bb31464e3cf1b016502e3ee2de4f86f5c", 205 | "sha256:c0446b2871335d5a5e9fcf1462f954586b09a845832263db95059dcd01442015", 206 | "sha256:c68d2c04f7701a418ec2e5631b7f3552efc32f6bcc1739369c6eeb1af55f62e0", 207 | "sha256:c87ac58b9baaf50b6c1b81a18d20eda7e2883aa9a4fb4f1ca70f2e443bfcdc57", 208 | "sha256:caa2734ada16a44ae57b229d45091f06e30a9a52ace76d7574546ab23008c635", 209 | "sha256:cb34c2d66355fb70ae47b5595aafd7218e59bb9c00ad8cc3abd1406ca5874f07", 210 | "sha256:cb3652bbe6720786b9137862205986f3ae54a09dec8499a995ed58292bdf77c2", 211 | "sha256:cf668f26604e9f7aee9f8eaae4ca07a948168af90b96be97a4b7fa902a6d2ac1", 212 | "sha256:d326ff80ed531bf2507cba93011c30fff2dd51454c85f55df0f59f2030b1687b", 213 | "sha256:d6c2441538e4fadd4291c8420853431a229fcbefc1bf521810fbc2629d8ae8c2", 214 | "sha256:d6ecfd1970b3380a569d7b3ecc5dd70dba295897418ed9e31ec3c16a5ab099a5", 215 | "sha256:e5602a9b5074dcacc113bba4d2f011d2748f50e3201c8139ac5b68cf2a76bd8b", 216 | "sha256:ef806f684f17dbd6263d72a54ad4073af42b42effa3eb42b877e750c24c76f86", 217 | "sha256:f3356afbb301ec34a500b8ba8b47cba0b44ed4641c306e1dd981a08b416170b5", 218 | "sha256:f6f7ee2289176cb1d2c59a24f50900f8b9580259fa9f1a739432242e7d254f93", 219 | "sha256:f7e8f1ee28e0a05831c92dc1c0c1c94af5289963b7cf09eca5b5e3ce4f8c91b0", 220 | "sha256:f8169ec628880bdbca67082a9196e2106060a4a5cbd486ac51881a4df805a36f", 221 | "sha256:fbc88d3ba402b5d041d204ec2449c4078898f89c4a6e6f0ed1c1a510ef1e221d", 222 | "sha256:fbd3fe37353c62fd0eb19fb76f78aa693716262bcd5f9c14bb9e5aca4b3f0dc4" 223 | ], 224 | "markers": "python_version >= '3.6'", 225 | "version": "==2022.3.2" 226 | }, 227 | "requests": { 228 | "hashes": [ 229 | "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", 230 | "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" 231 | ], 232 | "index": "pypi", 233 | "version": "==2.27.1" 234 | }, 235 | "rfc3986": { 236 | "extras": [ 237 | "idna2008" 238 | ], 239 | "hashes": [ 240 | "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835", 241 | "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97" 242 | ], 243 | "version": "==1.5.0" 244 | }, 245 | "six": { 246 | "hashes": [ 247 | "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", 248 | "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" 249 | ], 250 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", 251 | "version": "==1.16.0" 252 | }, 253 | "sniffio": { 254 | "hashes": [ 255 | "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663", 256 | "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de" 257 | ], 258 | "markers": "python_version >= '3.5'", 259 | "version": "==1.2.0" 260 | }, 261 | "tzdata": { 262 | "hashes": [ 263 | "sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9", 264 | "sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3" 265 | ], 266 | "markers": "python_version >= '3.6'", 267 | "version": "==2022.1" 268 | }, 269 | "tzlocal": { 270 | "hashes": [ 271 | "sha256:0f28015ac68a5c067210400a9197fc5d36ba9bc3f8eaf1da3cbd59acdfed9e09", 272 | "sha256:28ba8d9fcb6c9a782d6e0078b4f6627af1ea26aeaa32b4eab5324abc7df4149f" 273 | ], 274 | "markers": "python_version >= '3.6'", 275 | "version": "==4.1" 276 | }, 277 | "urllib3": { 278 | "hashes": [ 279 | "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", 280 | "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e" 281 | ], 282 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", 283 | "version": "==1.26.9" 284 | }, 285 | "zope.interface": { 286 | "hashes": [ 287 | "sha256:08f9636e99a9d5410181ba0729e0408d3d8748026ea938f3b970a0249daa8192", 288 | "sha256:0b465ae0962d49c68aa9733ba92a001b2a0933c317780435f00be7ecb959c702", 289 | "sha256:0cba8477e300d64a11a9789ed40ee8932b59f9ee05f85276dbb4b59acee5dd09", 290 | "sha256:0cee5187b60ed26d56eb2960136288ce91bcf61e2a9405660d271d1f122a69a4", 291 | "sha256:0ea1d73b7c9dcbc5080bb8aaffb776f1c68e807767069b9ccdd06f27a161914a", 292 | "sha256:0f91b5b948686659a8e28b728ff5e74b1be6bf40cb04704453617e5f1e945ef3", 293 | "sha256:15e7d1f7a6ee16572e21e3576d2012b2778cbacf75eb4b7400be37455f5ca8bf", 294 | "sha256:17776ecd3a1fdd2b2cd5373e5ef8b307162f581c693575ec62e7c5399d80794c", 295 | "sha256:194d0bcb1374ac3e1e023961610dc8f2c78a0f5f634d0c737691e215569e640d", 296 | "sha256:1c0e316c9add0db48a5b703833881351444398b04111188069a26a61cfb4df78", 297 | "sha256:205e40ccde0f37496904572035deea747390a8b7dc65146d30b96e2dd1359a83", 298 | "sha256:273f158fabc5ea33cbc936da0ab3d4ba80ede5351babc4f577d768e057651531", 299 | "sha256:2876246527c91e101184f63ccd1d716ec9c46519cc5f3d5375a3351c46467c46", 300 | "sha256:2c98384b254b37ce50eddd55db8d381a5c53b4c10ee66e1e7fe749824f894021", 301 | "sha256:2e5a26f16503be6c826abca904e45f1a44ff275fdb7e9d1b75c10671c26f8b94", 302 | "sha256:334701327f37c47fa628fc8b8d28c7d7730ce7daaf4bda1efb741679c2b087fc", 303 | "sha256:3748fac0d0f6a304e674955ab1365d515993b3a0a865e16a11ec9d86fb307f63", 304 | "sha256:3c02411a3b62668200910090a0dff17c0b25aaa36145082a5a6adf08fa281e54", 305 | "sha256:3dd4952748521205697bc2802e4afac5ed4b02909bb799ba1fe239f77fd4e117", 306 | "sha256:3f24df7124c323fceb53ff6168da70dbfbae1442b4f3da439cd441681f54fe25", 307 | "sha256:469e2407e0fe9880ac690a3666f03eb4c3c444411a5a5fddfdabc5d184a79f05", 308 | "sha256:4de4bc9b6d35c5af65b454d3e9bc98c50eb3960d5a3762c9438df57427134b8e", 309 | "sha256:5208ebd5152e040640518a77827bdfcc73773a15a33d6644015b763b9c9febc1", 310 | "sha256:52de7fc6c21b419078008f697fd4103dbc763288b1406b4562554bd47514c004", 311 | "sha256:5bb3489b4558e49ad2c5118137cfeaf59434f9737fa9c5deefc72d22c23822e2", 312 | "sha256:5dba5f530fec3f0988d83b78cc591b58c0b6eb8431a85edd1569a0539a8a5a0e", 313 | "sha256:5dd9ca406499444f4c8299f803d4a14edf7890ecc595c8b1c7115c2342cadc5f", 314 | "sha256:5f931a1c21dfa7a9c573ec1f50a31135ccce84e32507c54e1ea404894c5eb96f", 315 | "sha256:63b82bb63de7c821428d513607e84c6d97d58afd1fe2eb645030bdc185440120", 316 | "sha256:66c0061c91b3b9cf542131148ef7ecbecb2690d48d1612ec386de9d36766058f", 317 | "sha256:6f0c02cbb9691b7c91d5009108f975f8ffeab5dff8f26d62e21c493060eff2a1", 318 | "sha256:71aace0c42d53abe6fc7f726c5d3b60d90f3c5c055a447950ad6ea9cec2e37d9", 319 | "sha256:7d97a4306898b05404a0dcdc32d9709b7d8832c0c542b861d9a826301719794e", 320 | "sha256:7df1e1c05304f26faa49fa752a8c690126cf98b40b91d54e6e9cc3b7d6ffe8b7", 321 | "sha256:8270252effc60b9642b423189a2fe90eb6b59e87cbee54549db3f5562ff8d1b8", 322 | "sha256:867a5ad16892bf20e6c4ea2aab1971f45645ff3102ad29bd84c86027fa99997b", 323 | "sha256:877473e675fdcc113c138813a5dd440da0769a2d81f4d86614e5d62b69497155", 324 | "sha256:8892f89999ffd992208754851e5a052f6b5db70a1e3f7d54b17c5211e37a98c7", 325 | "sha256:9a9845c4c6bb56e508651f005c4aeb0404e518c6f000d5a1123ab077ab769f5c", 326 | "sha256:a1e6e96217a0f72e2b8629e271e1b280c6fa3fe6e59fa8f6701bec14e3354325", 327 | "sha256:a8156e6a7f5e2a0ff0c5b21d6bcb45145efece1909efcbbbf48c56f8da68221d", 328 | "sha256:a9506a7e80bcf6eacfff7f804c0ad5350c8c95b9010e4356a4b36f5322f09abb", 329 | "sha256:af310ec8335016b5e52cae60cda4a4f2a60a788cbb949a4fbea13d441aa5a09e", 330 | "sha256:b0297b1e05fd128d26cc2460c810d42e205d16d76799526dfa8c8ccd50e74959", 331 | "sha256:bf68f4b2b6683e52bec69273562df15af352e5ed25d1b6641e7efddc5951d1a7", 332 | "sha256:d0c1bc2fa9a7285719e5678584f6b92572a5b639d0e471bb8d4b650a1a910920", 333 | "sha256:d4d9d6c1a455d4babd320203b918ccc7fcbefe308615c521062bc2ba1aa4d26e", 334 | "sha256:db1fa631737dab9fa0b37f3979d8d2631e348c3b4e8325d6873c2541d0ae5a48", 335 | "sha256:dd93ea5c0c7f3e25335ab7d22a507b1dc43976e1345508f845efc573d3d779d8", 336 | "sha256:f44e517131a98f7a76696a7b21b164bcb85291cee106a23beccce454e1f433a4", 337 | "sha256:f7ee479e96f7ee350db1cf24afa5685a5899e2b34992fb99e1f7c1b0b758d263" 338 | ], 339 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 340 | "version": "==5.4.0" 341 | } 342 | }, 343 | "develop": {} 344 | } 345 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |

4 | 5 |

6 | 7 |

8 | A program to copy all your Kindle highlights and notes to a page in Notion. 9 |
10 | Explore the docs 11 | · 12 | File issues and feature requests here 13 |

14 |

15 | If you found this script helpful or appreciate my work, you can support me here: 16 |

17 | Kindle2Notion - Export your Kindle clippings to a Notion database. | Product Hunt 18 | Buy Me A Coffee 19 |

20 | 21 | [![Contributors][contributors-shield]][contributors-url] 22 | [![Forks][forks-shield]][forks-url] 23 | [![Stargazers][stars-shield]][stars-url] 24 | [![Issues][issues-shield]][issues-url] 25 | [![MIT License][license-shield]][license-url] 26 | [![LinkedIn][linkedin-shield]][linkedin-url] 27 | 28 | 29 | ## Table of Contents 30 | 31 | - [Table of Contents](#table-of-contents) 32 | - [About The Project](#about-the-project) 33 | - [Getting Started](#getting-started) 34 | - [Prerequisites](#prerequisites) 35 | - [Installation](#installation) 36 | - [Usage](#usage) 37 | - [Roadmap](#roadmap) 38 | - [Contributing](#contributing) 39 | - [License](#license) 40 | - [Contact](#contact) 41 | 42 | 43 | 44 | 45 | ## About The Project 46 | 47 | ![Kindle2Notion Demo][product-demo] 48 | 49 | A Python package to export all the clippings from your Kindle device to a page in Notion. Run this script whenever you plug in your Kindle device to your PC. 50 | 51 | A key inspiration behind this project was the notes saving feature on Google Play Books, which automatically syncs all your highlights from a book hosted on the service to a Google Doc in real time. I wanted a similar feature for my Kindle and this project is one step towards a solution for this problem. 52 | 53 | **Intended for** 54 | - Avid readers who would want to browse through their prior reads and highlights anytime anywhere. 55 | - For those who take notes alongside their highlights. 56 | 57 | 58 | 59 | ## Getting Started 60 | 61 | 62 | > **NOTE** 63 | > Need a step-by-step guide to setting this package up? Click [here](https://www.notion.so/kindle2notion/Kindle2Notion-8a9683c9b19546c3b1cf42a68aceebee) for the full guide. 64 | 65 | To get a local copy up and running follow these simple steps. 66 | 67 | ### Prerequisites 68 | 69 | * A Kindle device. 70 | * A Notion account to store your links. 71 | * Python 3 on your system to run the code. 72 | 73 | ### Installation 74 | 75 | 1. Install the library. 76 | ```sh 77 | pip install kindle2notion 78 | ``` 79 | 2. Export your Kindle highlights and notes to Notion! 80 | - On MacOS and UNIX, 81 | ```sh 82 | kindle2notion 'your_notion_token' 'your_notion_table_id' 'your_kindle_clippings_file' 83 | ``` 84 | - On Windows 85 | ```sh 86 | python -m kindle2notion 'your_notion_token' 'your_notion_table_id' 'your_kindle_clippings_file' 87 | ``` 88 | 89 | 90 | 91 | 92 | ## Usage 93 | 94 | 1. Plug in your Kindle device to your PC. 95 | 96 | 2. Duplicate this [database template](https://www.notion.so/kindle2notion/6d26062e3bb04dd89b988806978c1fe7?v=0d394a8162cc481280966b35a37465c2) to your Notion workspace. 97 | 98 | 3. Find your Notion token. Since this code requires access of non-public pages, an authentication token from your Notion page is required. This token is stored in the `token_v2` cookie. This can be found in the *Storage* tab of your browser's developer tools. 99 | - For Chrome: Open Developer Tools (*Menu > Other tools > Developer Tools*), navigate to Application tab and go to *Storage\Cookies* to find the token listed next to *tokenv2_*. 100 | 101 | 4. Find your Notion table ID: it's simply the URL of the database you have copied to your workspace. 102 | 103 | 5. You may modify some default parameters of the command-line with the following options of the CLI: 104 | - ```--enable_highlight_date``` Set to False if you don't want to see the "Date Added" information in Notion. 105 | - ```--enable_book_cover``` Set to False if you don't want to store the book cover in Notion. 106 | 107 | 6. Export your Kindle highlights and notes to Notion! 108 | - On MacOS and UNIX, 109 | ```sh 110 | kindle2notion 'your_notion_token' 'your_notion_table_id' 'your_kindle_clippings_file' 111 | ``` 112 | - On Windows 113 | ```sh 114 | python -m kindle2notion 'your_notion_token' 'your_notion_table_id' 'your_kindle_clippings_file' 115 | ``` 116 | You may also avail help with the following command: 117 | ```sh 118 | kindle2notion --help 119 | python -m kindle2notion --help 120 | ``` 121 | 122 | [**Note:** This code has been tested on a 4th Gen Kindle Paperwhite on both MacOS and Windows.] 123 | 124 | 125 | 126 | ## Roadmap 127 | 128 | See the [open issues](https://github.com/paperboi/Kindle2Notion/issues) for a list of proposed features (and known issues). 129 | 130 | 131 | 132 | 133 | ## Contributing 134 | 135 | 136 | Any contributions you make are **greatly appreciated**. 137 | 138 | 1. Fork the Project 139 | 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`) 140 | 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`) 141 | 4. Push to the Branch (`git push origin feature/AmazingFeature`) 142 | 5. Open a Pull Request 143 | 144 | 145 | 146 | 147 | ## License 148 | 149 | Distributed under the MIT License. See [LICENSE][license-url] for more information. 150 | 151 | 152 | 153 | 154 | ## Contact 155 | 156 | Jeffrey Jacob ([Twitter](https://twitter.com/jeffreysamjacob) | [Email](mailto:jeffreysamjacob@gmail.com) | [LinkedIn](https://www.linkedin.com/in/jeffreysamjacob/)) 157 | 158 | 159 | 160 | [contributors-shield]: https://img.shields.io/github/contributors/paperboi/Kindle2Notion.svg?style=flat-square 161 | [contributors-url]: https://github.com/paperboi/Kindle2Notion/graphs/contributors 162 | [forks-shield]: https://img.shields.io/github/forks/paperboi/Kindle2Notion.svg?style=flat-square 163 | [forks-url]: https://github.com/paperboi/Kindle2Notion/network/members 164 | [stars-shield]: https://img.shields.io/github/stars/paperboi/Kindle2Notion.svg?style=flat-square 165 | [stars-url]: https://github.com/paperboi/Kindle2Notion/stargazers 166 | [issues-shield]: https://img.shields.io/github/issues/paperboi/Kindle2Notion.svg?style=flat-square 167 | [issues-url]: https://github.com/paperboi/Kindle2Notion/issues 168 | [license-shield]: https://img.shields.io/github/license/paperboi/Kindle2Notion.svg?style=flat-square 169 | [license-url]: https://github.com/paperboi/kindle2notion/blob/master/LICENSE 170 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=flat-square&logo=linkedin&colorB=555 171 | [linkedin-url]: https://www.linkedin.com/in/jeffreysamjacob/ 172 | [product-demo]: https://i.imgur.com/IlDmEOy.gif 173 | -------------------------------------------------------------------------------- /kindle2notion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wsine/kindle2notion/b1e54b93c1129b2f287d92449fc24dfde81f1d7d/kindle2notion/__init__.py -------------------------------------------------------------------------------- /kindle2notion/__main__.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from kindle2notion.exporting import export_to_notion 4 | from kindle2notion.parsing import parse_raw_clippings_text 5 | from kindle2notion.reading import read_raw_clippings 6 | 7 | from notion_client import Client 8 | 9 | @click.command() 10 | @click.argument("notion_token") 11 | @click.argument("notion_table_id") 12 | @click.argument("clippings_file_path") 13 | @click.option( 14 | "--enable_highlight_date", 15 | default=True, 16 | help='Set to False if you don\'t want to see the "Date Added" information in Notion.', 17 | ) 18 | @click.option( 19 | "--enable_book_cover", 20 | default=True, 21 | help="Set to False if you don't want to store the book cover in Notion.", 22 | ) 23 | def main( 24 | notion_token, 25 | notion_table_id, 26 | clippings_file_path, 27 | enable_highlight_date, 28 | enable_book_cover, 29 | ): 30 | 31 | notion_client = Client(auth=notion_token) 32 | notion_collection_view = notion_client.databases.retrieve(notion_table_id) 33 | 34 | if len(notion_collection_view) > 0: 35 | print("Notion page is found. Analyzing clippings file...") 36 | all_clippings = read_raw_clippings(clippings_file_path) 37 | books = parse_raw_clippings_text(all_clippings) 38 | export_to_notion( 39 | books, 40 | enable_highlight_date, 41 | enable_book_cover, 42 | notion_token, 43 | notion_table_id, 44 | ) 45 | print("Transfer complete... Exiting script...") 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /kindle2notion/exporting.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Dict, List, Tuple 3 | from dateparser import parse 4 | from dateutil.tz import tzlocal 5 | 6 | from notion_client import Client 7 | from requests import get 8 | 9 | NO_COVER_IMG = "https://via.placeholder.com/150x200?text=No%20Cover" 10 | ITALIC = "*" 11 | BOLD = "**" 12 | 13 | # TODO: Refactor this module 14 | 15 | def export_to_notion( 16 | books: Dict, 17 | enable_highlight_date: bool, 18 | enable_book_cover: bool, 19 | notion_token: str, 20 | notion_table_id: str, 21 | ) -> None: 22 | print("Initiating transfer...\n") 23 | 24 | for title in books: 25 | print("Checking book: " + title) 26 | 27 | book = books[title] 28 | author = book["author"] 29 | highlights = book["highlights"] 30 | highlight_count = len(highlights) 31 | ( 32 | aggregated_text_from_highlights, 33 | last_date, 34 | ) = _prepare_aggregated_text_for_one_book(highlights, enable_highlight_date) 35 | message = _add_book_to_notion( 36 | title, 37 | author, 38 | highlight_count, 39 | aggregated_text_from_highlights, 40 | last_date, 41 | notion_token, 42 | notion_table_id, 43 | enable_book_cover, 44 | ) 45 | if message != "None to add": 46 | print("✓", message) 47 | 48 | def _prepare_aggregated_text_for_one_book( 49 | highlights: List, enable_highlight_date: bool 50 | ) -> Tuple[str, str]: 51 | aggregated_text = "" 52 | for highlight in highlights: 53 | text = highlight[0] 54 | page = highlight[1] 55 | location = highlight[2] 56 | date = highlight[3] 57 | isNote = highlight[4] 58 | if isNote == True: 59 | aggregated_text += BOLD + "Note: " + BOLD 60 | 61 | aggregated_text += text + "\n(" 62 | if page != "": 63 | aggregated_text += "Page: " + page + " " 64 | if location != "": 65 | aggregated_text += "Location: " + location + " " 66 | if enable_highlight_date and (date != ""): 67 | aggregated_text += "Date Added: " + date 68 | 69 | aggregated_text = aggregated_text.strip() + ")\n\n" 70 | last_date = date 71 | return aggregated_text, last_date 72 | 73 | 74 | def _add_book_to_notion( 75 | title: str, 76 | author: str, 77 | highlight_count: int, 78 | aggregated_text: str, 79 | last_date: str, 80 | notion_token: str, 81 | notion_table_id: str, 82 | enable_book_cover: bool, 83 | ) -> str: 84 | notion_client = Client(auth=notion_token) 85 | notion_books_database = notion_client.databases.retrieve(notion_table_id) 86 | notion_books = notion_client.databases.query(notion_books_database['id']).get('results') 87 | 88 | title_exists = False 89 | if notion_books: 90 | for c_row in notion_books: 91 | book_info = c_row.get('properties') 92 | if title == book_info['Title']['title'][0]['plain_text']: 93 | title_exists = True 94 | row = c_row 95 | 96 | if row['properties']['Highlights']['number'] is None: 97 | row['properties']['Highlights']['number'] = 0 98 | elif row['properties']['Highlights']['number'] == highlight_count: 99 | return "None to add" 100 | 101 | 102 | title_and_author = title + " (" + str(author) + ")" 103 | print(title_and_author) 104 | print("-" * len(title_and_author)) 105 | 106 | if not title_exists: 107 | new_page = { 108 | "Title": {"title": [{"text": {"content": title}}]}, 109 | "Author": { 110 | "type": "rich_text", 111 | "rich_text": [ 112 | { 113 | "type": "text", 114 | "text": {"content": author}, 115 | } 116 | ], 117 | }, 118 | "Highlights": {"type": "number", "number": 0}, 119 | } 120 | row = notion_client.pages.create(parent={"database_id": notion_table_id}, properties=new_page) 121 | 122 | 123 | parent_page = notion_client.pages.retrieve(row['id']) 124 | 125 | for all_blocks in notion_client.blocks.children.list(parent_page['id'])['results']: 126 | notion_client.blocks.delete(all_blocks['id']) 127 | 128 | # Split aggregated_text into paragraphs 129 | chunk_size = 1500 130 | chunks = [{'type': 'text', 'text': {'content': aggregated_text[i:i+chunk_size]}} for i in range(0, len(aggregated_text), chunk_size)] 131 | 132 | new_block = { 133 | 'object': 'block', 134 | 'type': 'paragraph', 135 | 'paragraph': { 136 | 'text': chunks, 137 | } 138 | } 139 | notion_client.blocks.children.append(block_id=parent_page['id'], children=[new_block]) 140 | 141 | diff_count = highlight_count - (row['properties']['Highlights']['number'] or 0) 142 | updated_info = { 143 | "Highlights": {"type": "number", "number": highlight_count}, 144 | # "Last Highlighted": {"type": "date", "date": {'start': parse(last_date).replace(tzinfo=tzlocal()).isoformat()}}, 145 | "Last Synced": {"type": "date", "date": {'start': datetime.now(tzlocal()).isoformat()}}, 146 | } 147 | notion_client.pages.update(page_id=row['id'], properties=updated_info) 148 | 149 | message = str(diff_count) + " notes / highlights added successfully\n" 150 | return message 151 | -------------------------------------------------------------------------------- /kindle2notion/parsing.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Dict, List, Tuple 3 | 4 | from dateparser import parse 5 | 6 | ACADEMIC_TITLES = [ 7 | "A.A.", 8 | "A.S.", 9 | "A.A.A.", 10 | "A.A.S.", 11 | "A.B.", 12 | "A.D.N.", 13 | "A.M.", 14 | "A.M.T.", 15 | "C.E.", 16 | "Ch.E.", 17 | "D.A.", 18 | "D.A.S.", 19 | "D.B.A.", 20 | "D.C.", 21 | "D.D.", 22 | "D.Ed.", 23 | "D.L.S.", 24 | "D.M.D.", 25 | "D.M.S.", 26 | "D.P.A.", 27 | "D.P.H.", 28 | "D.R.E.", 29 | "D.S.W.", 30 | "D.Sc.", 31 | "D.V.M.", 32 | "Ed.D.", 33 | "Ed.S.", 34 | "E.E.", 35 | "E.M.", 36 | "E.Met.", 37 | "I.E.", 38 | "J.D.", 39 | "J.S.D.", 40 | "L.H.D.", 41 | "Litt.B.", 42 | "Litt.M.", 43 | "LL.B.", 44 | "LL.D.", 45 | "LL.M.", 46 | "M.A.", 47 | "M.Aero.E.", 48 | "M.B.A.", 49 | "M.C.S.", 50 | "M.D.", 51 | "M.Div.", 52 | "M.E.", 53 | "M.Ed.", 54 | "M.Eng.", 55 | "M.F.A.", 56 | "M.H.A.", 57 | "M.L.S.", 58 | "M.Mus.", 59 | "M.N.", 60 | "M.P.A.", 61 | "M.S.", 62 | "M.S.Ed.", 63 | "M.S.W.", 64 | "M.Th.", 65 | "Nuc.E.", 66 | "O.D.", 67 | "Pharm.D.", 68 | "Ph.B.", 69 | "Ph.D.", 70 | "S.B.", 71 | "Sc.D.", 72 | "S.J.D.", 73 | "S.Sc.D.", 74 | "Th.B.", 75 | "Th.D.", 76 | "Th.M.", 77 | ] 78 | 79 | DELIMITERS = ["; ", " & ", " and "] 80 | 81 | 82 | def parse_raw_clippings_text(raw_clippings_text: str) -> Dict: 83 | raw_clippings_list = raw_clippings_text.split("==========") 84 | print(f"Found {len(raw_clippings_list)} notes and highlights.\n") 85 | 86 | books = {} 87 | passed_clippings_count = 0 88 | 89 | for raw_clipping in raw_clippings_list: 90 | raw_clipping_list = raw_clipping.strip().split("\n") 91 | 92 | if _is_valid_clipping(raw_clipping_list): 93 | author, title = _parse_author_and_title(raw_clipping_list) 94 | page, location, date, isNote = _parse_page_location_date_and_note( 95 | raw_clipping_list 96 | ) 97 | highlight = raw_clipping_list[3] 98 | 99 | books = _add_parsed_items_to_books_dict( 100 | books, title, author, highlight, page, location, date, isNote 101 | ) 102 | else: 103 | passed_clippings_count += 1 104 | 105 | print(f"× Passed {passed_clippings_count} bookmarks or unsupported clippings.\n") 106 | return books 107 | 108 | 109 | def _is_valid_clipping(raw_clipping_list: List) -> bool: 110 | return len(raw_clipping_list) >= 3 111 | 112 | 113 | def _parse_author_and_title(raw_clipping_list: List) -> Tuple[str, str]: 114 | author, title = _parse_raw_author_and_title(raw_clipping_list) 115 | author, title = _deal_with_exceptions_in_author_name(author, title) 116 | title = _deal_with_exceptions_in_title(title) 117 | return author, title 118 | 119 | 120 | def _parse_page_location_date_and_note( 121 | raw_clipping_list: List, 122 | ) -> Tuple[str, str, str, bool]: 123 | second_line = raw_clipping_list[1] 124 | second_line_as_list = second_line.strip().split(" | ") 125 | page = location = date = "" 126 | isNote = False 127 | 128 | for element in second_line_as_list: 129 | element = element.lower() 130 | if "note" in element: 131 | isNote = True 132 | if "page" in element: 133 | page = element[element.find("page") :].replace("page", "").strip() 134 | if "location" in element: 135 | location = ( 136 | element[element.find("location") :].replace("location", "").strip() 137 | ) 138 | if "added on" in element: 139 | date = parse( 140 | element[element.find("added on") :].replace("added on", "").strip() 141 | ) 142 | date = date.strftime("%A, %d %B %Y %I:%M:%S %p") 143 | if "页" in element: 144 | page = element[element.find("第") : element.find("页") + 1].strip() 145 | if "位置" in element: 146 | location = element[element.find("位置") - 1 :].strip() 147 | if "添加于" in element: 148 | date = element 149 | 150 | 151 | return page, location, date, isNote 152 | 153 | 154 | def _add_parsed_items_to_books_dict( 155 | books: Dict, 156 | title: str, 157 | author: str, 158 | highlight: str, 159 | page: str, 160 | location: str, 161 | date: str, 162 | isNote: bool, 163 | ) -> Dict: 164 | if title not in books: 165 | books[title] = {"author": author, "highlights": []} 166 | books[title]["highlights"].append((highlight, page, location, date, isNote)) 167 | return books 168 | 169 | 170 | def _parse_raw_author_and_title(raw_clipping_list: List) -> Tuple[str, str]: 171 | author = "" 172 | title = raw_clipping_list[0] 173 | 174 | if re.findall(r"\(.*?\)", raw_clipping_list[0]): 175 | author = (re.findall(r"\(.*?\)", raw_clipping_list[0]))[-1] 176 | author = author.removeprefix("(").removesuffix(")") 177 | else: 178 | print( 179 | "No author found. You can manually add the Author details in the Notion database." 180 | ) 181 | 182 | title = raw_clipping_list[0].replace(author, "").strip().replace(" ()", "") 183 | 184 | return author, title 185 | 186 | 187 | def _deal_with_exceptions_in_author_name(author: str, title: str) -> Tuple[str, str]: 188 | if "(" in author: 189 | author = author + ")" 190 | title = title.removesuffix(")") 191 | 192 | if ", " in author and all(x not in author for x in DELIMITERS): 193 | if (author.split(", "))[1] not in ACADEMIC_TITLES: 194 | author = " ".join(reversed(author.split(", "))) 195 | 196 | if "; " in author: 197 | authorList = author.split("; ") 198 | author = "" 199 | for ele in authorList: 200 | author += " ".join(reversed(ele.split(", "))) + ", " 201 | author = author.removesuffix(", ") 202 | return author, title 203 | 204 | 205 | def _deal_with_exceptions_in_title(title: str) -> str: 206 | if ", The" in title: 207 | title = "The " + title.replace(", The", "") 208 | return title 209 | -------------------------------------------------------------------------------- /kindle2notion/reading.py: -------------------------------------------------------------------------------- 1 | import unicodedata 2 | from pathlib import Path 3 | 4 | 5 | def read_raw_clippings(clippings_file_path: Path) -> str: 6 | raw_clippings_text = open(clippings_file_path, "r", encoding="utf-8-sig").read() 7 | # raw_clippings_text = raw_clippings_text.encode("ascii", errors="ignore").decode() 8 | raw_clippings_text = unicodedata.normalize("NFKD", raw_clippings_text) 9 | return raw_clippings_text 10 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta:__legacy__" -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | flake8>=3.9.2 2 | pytest>=6.2.4 3 | pytest-cov>=2.12.0 4 | black>=21.5b2 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click>=8.0.0 2 | dateparser>=1.0.0 3 | DateTime>=4.3 4 | python-decouple>=3.3 5 | requests>=2.25.0 6 | pathlib 7 | notion-client 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = 3 | .git, 4 | __pycache__, 5 | venv, 6 | idea, 7 | .venv 8 | max-line-length = 120 9 | inline-quotes = single 10 | multiline-quotes = ''' 11 | avoid-escape = True 12 | 13 | [tool:pytest] 14 | testpaths = tests/ 15 | norecursedirs = .git venv/ .pytest_cache/ main/ -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open("README.md", "r", encoding="utf-8") as f: 4 | long_description = f.read() 5 | 6 | with open("requirements.txt", "r", encoding="utf-8") as f: 7 | requirements = f.read() 8 | 9 | with open("requirements-dev.txt", "r", encoding="utf-8") as f: 10 | requirements_dev = f.read() 11 | 12 | setup( 13 | name="kindle2notion", 14 | version="0.1.8", 15 | author="Jeffrey Jacob", 16 | author_email="jeffreysamjacob@gmail.com", 17 | description="Export all the clippings from your Kindle device to a database in Notion.", 18 | long_description=long_description, 19 | long_description_content_type="text/markdown", 20 | url="https://github.com/paperboi/kindle2notion", 21 | classifiers=[ 22 | "Programming Language :: Python :: 3", 23 | "License :: OSI Approved :: MIT License", 24 | "Operating System :: OS Independent", 25 | ], 26 | packages=find_packages(), 27 | install_requires=requirements, 28 | extras_require={"dev": requirements_dev}, 29 | python_requires=">=3.9", 30 | entry_points={ 31 | "console_scripts": [ 32 | "kindle2notion = kindle2notion.__main__:main", 33 | ], 34 | }, 35 | ) 36 | -------------------------------------------------------------------------------- /tests/test_data/Test Clippings.txt: -------------------------------------------------------------------------------- 1 | Title 1: A Great Book (Horowitz, Ben) 2 | - Your Highlight on page 11 | Location 111-114 | Added on Tuesday, September 22, 2020 9:23:48 AM 3 | 4 | This is test highlight 1. 5 | ========== 6 | Title 1: A Great Book (Horowitz, Ben) 7 | - Your Highlight on page 11 | Location 111-114 | Added on Tuesday, September 22, 2020 9:24:04 AM 8 | 9 | This is test highlight 2. 10 | ========== 11 | Title 2 Is Good Too (Bryar, Colin) 12 | - Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM 13 | 14 | This is test highlight 3. 15 | ========== 16 | Title 2 Is Good Too (Bryar, Colin) 17 | - Your Highlight on page 34 | Location 682-684 | Added on Friday, April 30, 2021 3:14:33 PM 18 | 19 | This is test highlight 4. 20 | ========== 21 | Title 3 Is Clean (Robert C. Martin Series) (C., Martin Robert) 22 | - Your Highlight on page 22 | Location 559-560 | Added on Saturday, May 15, 2021 10:25:42 PM 23 | 24 | This is test highlight 5. 25 | ========== 26 | Title 3 Is Clean (Robert C. Martin Series) (C., Martin Robert) 27 | - Your Highlight on page 22 | Location 564-565 | Added on Saturday, May 15, 2021 10:26:26 PM 28 | 29 | This is test highlight 6. 30 | ========== -------------------------------------------------------------------------------- /tests/test_exporting.py: -------------------------------------------------------------------------------- 1 | from kindle2notion.exporting import _prepare_aggregated_text_for_one_book 2 | 3 | 4 | def test_prepare_aggregated_text_for_one_book_should_return_the_aggregated_text_when_highlight_date_is_disabled(): 5 | # Given 6 | highlights = [ 7 | ( 8 | "This is an example highlight.", 9 | "1", 10 | "100", 11 | "Thursday, 29 April 2021 12:31:29 AM", 12 | ), 13 | ( 14 | "This is a second example highlight.", 15 | "2", 16 | "200", 17 | "Friday, 30 April 2021 12:31:29 AM", 18 | ), 19 | ] 20 | 21 | expected = ( 22 | "This is an example highlight.\n(*Page: 1* *Location: 100*)\n\n" 23 | "This is a second example highlight.\n(*Page: 2* *Location: 200*)\n\n", 24 | "Friday, 30 April 2021 12:31:29 AM", 25 | ) 26 | 27 | # When 28 | actual = _prepare_aggregated_text_for_one_book( 29 | highlights, enable_highlight_date=False 30 | ) 31 | 32 | # Then 33 | assert expected == actual 34 | 35 | 36 | def test_prepare_aggregated_text_for_one_book_should_return_the_aggregated_text_when_highlight_date_is_enabled(): 37 | # Given 38 | highlights = [ 39 | ( 40 | "This is an example highlight.", 41 | "1", 42 | "100", 43 | "Thursday, 29 April 2021 12:31:29 AM", 44 | ), 45 | ( 46 | "This is a second example highlight.", 47 | "2", 48 | "200", 49 | "Friday, 30 April 2021 12:31:29 AM", 50 | ), 51 | ] 52 | 53 | expected = ( 54 | "This is an example highlight.\n" 55 | "(*Page: 1* *Location: 100* *Date Added: Thursday, 29 April 2021 12:31:29 AM*)\n\n" 56 | "This is a second example highlight.\n" 57 | "(*Page: 2* *Location: 200* *Date Added: Friday, 30 April 2021 12:31:29 AM*)\n\n", 58 | "Friday, 30 April 2021 12:31:29 AM", 59 | ) 60 | 61 | # When 62 | actual = _prepare_aggregated_text_for_one_book( 63 | highlights, enable_highlight_date=True 64 | ) 65 | 66 | # Then 67 | assert expected == actual 68 | -------------------------------------------------------------------------------- /tests/test_parsing.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from pathlib import Path 3 | 4 | from kindle2notion.parsing import ( 5 | parse_raw_clippings_text, 6 | _parse_author_and_title, 7 | _parse_page_location_date_and_note, 8 | _add_parsed_items_to_books_dict, 9 | ) 10 | from kindle2notion.reading import read_raw_clippings 11 | 12 | 13 | def test_parse_raw_clippings_text_should_return_a_dict_with_all_the_parsed_information(): 14 | # Given 15 | test_clippings_file_path = ( 16 | Path(__file__).parent.absolute() / "test_data/Test Clippings.txt" 17 | ) 18 | raw_clippings_text = read_raw_clippings(test_clippings_file_path) 19 | 20 | expected = { 21 | "Title 1: A Great Book": { 22 | "author": "Ben Horowitz", 23 | "highlights": [ 24 | ( 25 | "This is test highlight 1.", 26 | "11", 27 | "111-114", 28 | "Tuesday, 22 September 2020 09:23:48 AM", 29 | ), 30 | ( 31 | "This is test highlight 2.", 32 | "11", 33 | "111-114", 34 | "Tuesday, 22 September 2020 09:24:04 AM", 35 | ), 36 | ], 37 | }, 38 | "Title 2 Is Good Too": { 39 | "author": "Colin Bryar", 40 | "highlights": [ 41 | ( 42 | "This is test highlight 3.", 43 | "3", 44 | "184-185", 45 | "Friday, 30 April 2021 12:31:29 AM", 46 | ), 47 | ( 48 | "This is test highlight 4.", 49 | "34", 50 | "682-684", 51 | "Friday, 30 April 2021 03:14:33 PM", 52 | ), 53 | ], 54 | }, 55 | "Title 3 Is Clean (Robert C. Martin Series)": { 56 | "author": "Martin Robert C.", 57 | "highlights": [ 58 | ( 59 | "This is test highlight 5.", 60 | "22", 61 | "559-560", 62 | "Saturday, 15 May 2021 10:25:42 PM", 63 | ), 64 | ( 65 | "This is test highlight 6.", 66 | "22", 67 | "564-565", 68 | "Saturday, 15 May 2021 10:26:26 PM", 69 | ), 70 | ], 71 | }, 72 | } 73 | 74 | # When 75 | actual = parse_raw_clippings_text(raw_clippings_text) 76 | 77 | # Then 78 | assert expected == actual 79 | 80 | 81 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_the_author_name_is_formatted_with_a_comma(): 82 | # Given 83 | raw_clipping_list = [ 84 | "Relativity (Einstein, Albert)", 85 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM", 86 | "", 87 | "This is a test highlight.", 88 | ] 89 | expected = ("Albert Einstein", "Relativity") 90 | 91 | # When 92 | actual = _parse_author_and_title(raw_clipping_list) 93 | 94 | # Then 95 | assert expected == actual 96 | 97 | 98 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_the_author_name_is_first_name_last_name(): 99 | # Given 100 | raw_clipping_list = [ 101 | "Relativity (Albert Einstein)", 102 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM", 103 | "", 104 | "This is a test highlight.", 105 | ] 106 | expected = ("Albert Einstein", "Relativity") 107 | 108 | # When 109 | actual = _parse_author_and_title(raw_clipping_list) 110 | 111 | # Then 112 | assert expected == actual 113 | 114 | 115 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_there_are_parentheses_in_the_author_name(): 116 | # Given 117 | raw_clipping_list = [ 118 | "Candide (Voltaire (François-Marie Arouet))", 119 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM", 120 | "", 121 | "This is a test highlight.", 122 | ] 123 | expected = ("Voltaire (François-Marie Arouet)", "Candide") 124 | 125 | # When 126 | actual = _parse_author_and_title(raw_clipping_list) 127 | 128 | # Then 129 | assert expected == actual 130 | 131 | 132 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_there_is_a_The_at_the_end_of_the_title(): 133 | # Given 134 | raw_clipping_list = [ 135 | "Age of Louis XIV, The (Voltaire (François-Marie Arouet))", 136 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM", 137 | "", 138 | "This is a test highlight.", 139 | ] 140 | expected = ("Voltaire (François-Marie Arouet)", "The Age of Louis XIV") 141 | 142 | # When 143 | actual = _parse_author_and_title(raw_clipping_list) 144 | 145 | # Then 146 | assert expected == actual 147 | 148 | 149 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_there_are_parentheses_in_the_title(): 150 | # Given 151 | raw_clipping_list = [ 152 | "The Mysterious Disappearance of Leon (I Mean Noel) (Ellen Raskin)", 153 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM", 154 | "", 155 | "This is a test highlight.", 156 | ] 157 | expected = ("Ellen Raskin", "The Mysterious Disappearance of Leon (I Mean Noel)") 158 | 159 | # When 160 | actual = _parse_author_and_title(raw_clipping_list) 161 | 162 | # Then 163 | assert expected == actual 164 | 165 | 166 | def test_parse_page_location_date_and_note_should_parse_the_page_location_and_date_when_there_are_all_three(): 167 | # Given 168 | raw_clipping_list = [ 169 | "Relativity (Albert Einstein)", 170 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM", 171 | "", 172 | "This is a test highlight.", 173 | ] 174 | expected = ("3", "184-185", "Friday, 30 April 2021 12:31:29 AM", False) 175 | 176 | # When 177 | actual = _parse_page_location_date_and_note(raw_clipping_list) 178 | 179 | # Then 180 | assert expected == actual 181 | 182 | 183 | def test_parse_page_location_date_and_note_should_parse_the_page_and_location_when_there_is_no_date(): 184 | # Given 185 | raw_clipping_list = [ 186 | "Relativity (Albert Einstein)", 187 | "- Your Highlight on page 3 | Location 184-185", 188 | "", 189 | "This is a test highlight.", 190 | ] 191 | expected = ("3", "184-185", "", False) 192 | 193 | # When 194 | actual = _parse_page_location_date_and_note(raw_clipping_list) 195 | 196 | # Then 197 | assert expected == actual 198 | 199 | 200 | def test_parse_page_location_date_and_note_should_parse_the_location_and_date_when_there_is_no_page(): 201 | # Given 202 | raw_clipping_list = [ 203 | "Relativity (Albert Einstein)", 204 | "Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM", 205 | "", 206 | "This is a test highlight.", 207 | ] 208 | expected = ("", "184-185", "Friday, 30 April 2021 12:31:29 AM", False) 209 | 210 | # When 211 | actual = _parse_page_location_date_and_note(raw_clipping_list) 212 | 213 | # Then 214 | assert expected == actual 215 | 216 | 217 | def test_parse_page_location_date_and_note_should_parse_the_page_and_date_when_there_is_no_location(): 218 | # Given 219 | raw_clipping_list = [ 220 | "Relativity (Albert Einstein)", 221 | "- Your Highlight on page 3 | Added on Friday, April 30, 2021 12:31:29 AM", 222 | "", 223 | "This is a test highlight.", 224 | ] 225 | expected = ("3", "", "Friday, 30 April 2021 12:31:29 AM", False) 226 | 227 | # When 228 | actual = _parse_page_location_date_and_note(raw_clipping_list) 229 | 230 | # Then 231 | assert expected == actual 232 | 233 | 234 | def test_add_parsed_items_to_books_dict_should_add_the_parsed_items_when_the_book_is_not_already_in_the_books_dict(): 235 | # Given 236 | books = {} 237 | title = "Relativity" 238 | author = "Albert Einstein" 239 | highlight = "This is a first highlight." 240 | page = "1" 241 | location = "100" 242 | date = datetime(2021, 4, 30, 0, 31, 29) 243 | 244 | expected = { 245 | "Relativity": { 246 | "author": "Albert Einstein", 247 | "highlights": [ 248 | ( 249 | "This is a first highlight.", 250 | "1", 251 | "100", 252 | datetime(2021, 4, 30, 0, 31, 29), 253 | ) 254 | ], 255 | } 256 | } 257 | 258 | # When 259 | actual = _add_parsed_items_to_books_dict( 260 | books, title, author, highlight, page, location, date 261 | ) 262 | 263 | # Then 264 | assert expected == actual 265 | 266 | 267 | def test_add_parsed_items_to_books_dict_should_add_the_parsed_items_when_the_book_is_already_in_the_books_dict(): 268 | # Given 269 | books = { 270 | "Relativity": { 271 | "author": "Albert Einstein", 272 | "highlights": [ 273 | ( 274 | "This is a first highlight.", 275 | "1", 276 | "100", 277 | datetime(2021, 4, 30, 0, 31, 29), 278 | ) 279 | ], 280 | } 281 | } 282 | title = "Relativity" 283 | author = "Albert Einstein" 284 | highlight = "This is a second highlight." 285 | page = "2" 286 | location = "200" 287 | date = datetime(2021, 5, 1, 0, 31, 29) 288 | 289 | expected = { 290 | "Relativity": { 291 | "author": "Albert Einstein", 292 | "highlights": [ 293 | ( 294 | "This is a first highlight.", 295 | "1", 296 | "100", 297 | datetime(2021, 4, 30, 0, 31, 29), 298 | ), 299 | ( 300 | "This is a second highlight.", 301 | "2", 302 | "200", 303 | datetime(2021, 5, 1, 0, 31, 29), 304 | ), 305 | ], 306 | } 307 | } 308 | 309 | # When 310 | actual = _add_parsed_items_to_books_dict( 311 | books, title, author, highlight, page, location, date 312 | ) 313 | 314 | # Then 315 | assert expected == actual 316 | --------------------------------------------------------------------------------