├── .gitignore
├── LICENSE
├── MANIFEST.in
├── Pipfile
├── Pipfile.lock
├── README.md
├── kindle2notion
├── __init__.py
├── __main__.py
├── exporting.py
├── parsing.py
└── reading.py
├── pyproject.toml
├── requirements-dev.txt
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
├── test_data
└── Test Clippings.txt
├── test_exporting.py
└── test_parsing.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by https://www.toptal.com/developers/gitignore/api/python
2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
3 |
4 | ### Python ###
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | pip-wheel-metadata/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 | pytestdebug.log
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 | doc/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # poetry
99 | #poetry.lock
100 |
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 |
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 |
108 | # SageMath parsed files
109 | *.sage.py
110 |
111 | # Environments
112 | # .env
113 | .env
114 | .venv/
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 | pythonenv*
121 |
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 |
126 | # Rope project settings
127 | .ropeproject
128 |
129 | # mkdocs documentation
130 | /site
131 |
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 |
137 | # Pyre type checker
138 | .pyre/
139 |
140 | # pytype static type analyzer
141 | .pytype/
142 |
143 | # operating system-related files
144 | # file properties cache/storage on macOS
145 | *.DS_Store
146 | # thumbnail cache on Windows
147 | Thumbs.db
148 |
149 | # profiling data
150 | .prof
151 |
152 |
153 | # End of https://www.toptal.com/developers/gitignore/api/python
154 |
155 |
156 | # IDE
157 | .vscode/
158 | # custom project files
159 | MyClippings.txt
160 | dist/
161 | images/
162 | .env.local
163 | My Clipping.txt
164 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Jeffrey Jacob
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include requirements-dev.txt
3 |
--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | click = ">=8.0.0"
8 | dateparser = ">=1.0.0"
9 | python-decouple = ">=3.3"
10 | requests = ">=2.25.0"
11 | pathlib = "*"
12 | notion-client = "*"
13 | DateTime = ">=4.3"
14 | kindle2notion = {editable = true, path = "."}
15 |
16 | [dev-packages]
17 |
18 | [requires]
19 | python_version = "3.9"
20 |
--------------------------------------------------------------------------------
/Pipfile.lock:
--------------------------------------------------------------------------------
1 | {
2 | "_meta": {
3 | "hash": {
4 | "sha256": "16640129d2e9de5bc0a42f71b6d3ab743e312c7ce6bd71406422cc5b6a22087c"
5 | },
6 | "pipfile-spec": 6,
7 | "requires": {
8 | "python_version": "3.9"
9 | },
10 | "sources": [
11 | {
12 | "name": "pypi",
13 | "url": "https://pypi.org/simple",
14 | "verify_ssl": true
15 | }
16 | ]
17 | },
18 | "default": {
19 | "anyio": {
20 | "hashes": [
21 | "sha256:a0aeffe2fb1fdf374a8e4b471444f0f3ac4fb9f5a5b542b48824475e0042a5a6",
22 | "sha256:b5fa16c5ff93fa1046f2eeb5bbff2dad4d3514d6cda61d02816dba34fa8c3c2e"
23 | ],
24 | "markers": "python_full_version >= '3.6.2'",
25 | "version": "==3.5.0"
26 | },
27 | "certifi": {
28 | "hashes": [
29 | "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
30 | "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
31 | ],
32 | "version": "==2021.10.8"
33 | },
34 | "charset-normalizer": {
35 | "hashes": [
36 | "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597",
37 | "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"
38 | ],
39 | "markers": "python_version >= '3'",
40 | "version": "==2.0.12"
41 | },
42 | "click": {
43 | "hashes": [
44 | "sha256:5e0d195c2067da3136efb897449ec1e9e6c98282fbf30d7f9e164af9be901a6b",
45 | "sha256:7ab900e38149c9872376e8f9b5986ddcaf68c0f413cf73678a0bca5547e6f976"
46 | ],
47 | "index": "pypi",
48 | "version": "==8.1.1"
49 | },
50 | "dateparser": {
51 | "hashes": [
52 | "sha256:038196b1f12c7397e38aad3d61588833257f6f552baa63a1499e6987fa8d42d9",
53 | "sha256:9600874312ff28a41f96ec7ccdc73be1d1c44435719da47fea3339d55ff5a628"
54 | ],
55 | "index": "pypi",
56 | "version": "==1.1.1"
57 | },
58 | "datetime": {
59 | "hashes": [
60 | "sha256:074b0c63d4328f4de30662786fa436a0567145b2ab02f47be5af68f58afc03ec",
61 | "sha256:4fcca115ddb466a1104df08d5c2a2f5805d14ca317800e1bfcea8f3d69f66e57"
62 | ],
63 | "index": "pypi",
64 | "version": "==4.4"
65 | },
66 | "h11": {
67 | "hashes": [
68 | "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6",
69 | "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042"
70 | ],
71 | "markers": "python_version >= '3.6'",
72 | "version": "==0.12.0"
73 | },
74 | "httpcore": {
75 | "hashes": [
76 | "sha256:47d772f754359e56dd9d892d9593b6f9870a37aeb8ba51e9a88b09b3d68cfade",
77 | "sha256:7503ec1c0f559066e7e39bc4003fd2ce023d01cf51793e3c173b864eb456ead1"
78 | ],
79 | "markers": "python_version >= '3.6'",
80 | "version": "==0.14.7"
81 | },
82 | "httpx": {
83 | "hashes": [
84 | "sha256:d8e778f76d9bbd46af49e7f062467e3157a5a3d2ae4876a4bbfd8a51ed9c9cb4",
85 | "sha256:e35e83d1d2b9b2a609ef367cc4c1e66fd80b750348b20cc9e19d1952fc2ca3f6"
86 | ],
87 | "markers": "python_version >= '3.6'",
88 | "version": "==0.22.0"
89 | },
90 | "idna": {
91 | "hashes": [
92 | "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
93 | "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
94 | ],
95 | "version": "==3.3"
96 | },
97 | "kindle2notion": {
98 | "editable": true,
99 | "path": "."
100 | },
101 | "notion-client": {
102 | "hashes": [
103 | "sha256:5a11b5fdbf00210ff256c507a7b0c7aa2842c076b22636dbb779ed9240e40e0d",
104 | "sha256:7438848aa0de81bf2183524ba5cc6caf5b9cba84e1da5051c822ecb31de89b00"
105 | ],
106 | "index": "pypi",
107 | "version": "==0.9.0"
108 | },
109 | "pathlib": {
110 | "hashes": [
111 | "sha256:6940718dfc3eff4258203ad5021090933e5c04707d5ca8cc9e73c94a7894ea9f"
112 | ],
113 | "index": "pypi",
114 | "version": "==1.0.1"
115 | },
116 | "python-dateutil": {
117 | "hashes": [
118 | "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
119 | "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"
120 | ],
121 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
122 | "version": "==2.8.2"
123 | },
124 | "python-decouple": {
125 | "hashes": [
126 | "sha256:2838cdf77a5cf127d7e8b339ce14c25bceb3af3e674e039d4901ba16359968c7",
127 | "sha256:6cf502dc963a5c642ea5ead069847df3d916a6420cad5599185de6bab11d8c2e"
128 | ],
129 | "index": "pypi",
130 | "version": "==3.6"
131 | },
132 | "pytz": {
133 | "hashes": [
134 | "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
135 | "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"
136 | ],
137 | "version": "==2022.1"
138 | },
139 | "pytz-deprecation-shim": {
140 | "hashes": [
141 | "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6",
142 | "sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d"
143 | ],
144 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
145 | "version": "==0.1.0.post0"
146 | },
147 | "regex": {
148 | "hashes": [
149 | "sha256:0008650041531d0eadecc96a73d37c2dc4821cf51b0766e374cb4f1ddc4e1c14",
150 | "sha256:03299b0bcaa7824eb7c0ebd7ef1e3663302d1b533653bfe9dc7e595d453e2ae9",
151 | "sha256:06b1df01cf2aef3a9790858af524ae2588762c8a90e784ba00d003f045306204",
152 | "sha256:09b4b6ccc61d4119342b26246ddd5a04accdeebe36bdfe865ad87a0784efd77f",
153 | "sha256:0be0c34a39e5d04a62fd5342f0886d0e57592a4f4993b3f9d257c1f688b19737",
154 | "sha256:0d96eec8550fd2fd26f8e675f6d8b61b159482ad8ffa26991b894ed5ee19038b",
155 | "sha256:0eb0e2845e81bdea92b8281a3969632686502565abf4a0b9e4ab1471c863d8f3",
156 | "sha256:13bbf0c9453c6d16e5867bda7f6c0c7cff1decf96c5498318bb87f8136d2abd4",
157 | "sha256:17e51ad1e6131c496b58d317bc9abec71f44eb1957d32629d06013a21bc99cac",
158 | "sha256:1977bb64264815d3ef016625adc9df90e6d0e27e76260280c63eca993e3f455f",
159 | "sha256:1e30762ddddb22f7f14c4f59c34d3addabc789216d813b0f3e2788d7bcf0cf29",
160 | "sha256:1e73652057473ad3e6934944af090852a02590c349357b79182c1b681da2c772",
161 | "sha256:20e6a27959f162f979165e496add0d7d56d7038237092d1aba20b46de79158f1",
162 | "sha256:286ff9ec2709d56ae7517040be0d6c502642517ce9937ab6d89b1e7d0904f863",
163 | "sha256:297c42ede2c81f0cb6f34ea60b5cf6dc965d97fa6936c11fc3286019231f0d66",
164 | "sha256:320c2f4106962ecea0f33d8d31b985d3c185757c49c1fb735501515f963715ed",
165 | "sha256:35ed2f3c918a00b109157428abfc4e8d1ffabc37c8f9abc5939ebd1e95dabc47",
166 | "sha256:3d146e5591cb67c5e836229a04723a30af795ef9b70a0bbd913572e14b7b940f",
167 | "sha256:42bb37e2b2d25d958c25903f6125a41aaaa1ed49ca62c103331f24b8a459142f",
168 | "sha256:42d6007722d46bd2c95cce700181570b56edc0dcbadbfe7855ec26c3f2d7e008",
169 | "sha256:43eba5c46208deedec833663201752e865feddc840433285fbadee07b84b464d",
170 | "sha256:452519bc4c973e961b1620c815ea6dd8944a12d68e71002be5a7aff0a8361571",
171 | "sha256:4b9c16a807b17b17c4fa3a1d8c242467237be67ba92ad24ff51425329e7ae3d0",
172 | "sha256:5510932596a0f33399b7fff1bd61c59c977f2b8ee987b36539ba97eb3513584a",
173 | "sha256:55820bc631684172b9b56a991d217ec7c2e580d956591dc2144985113980f5a3",
174 | "sha256:57484d39447f94967e83e56db1b1108c68918c44ab519b8ecfc34b790ca52bf7",
175 | "sha256:58ba41e462653eaf68fc4a84ec4d350b26a98d030be1ab24aba1adcc78ffe447",
176 | "sha256:5bc5f921be39ccb65fdda741e04b2555917a4bced24b4df14eddc7569be3b493",
177 | "sha256:5dcc4168536c8f68654f014a3db49b6b4a26b226f735708be2054314ed4964f4",
178 | "sha256:5f92a7cdc6a0ae2abd184e8dfd6ef2279989d24c85d2c85d0423206284103ede",
179 | "sha256:67250b36edfa714ba62dc62d3f238e86db1065fccb538278804790f578253640",
180 | "sha256:6df070a986fc064d865c381aecf0aaff914178fdf6874da2f2387e82d93cc5bd",
181 | "sha256:729aa8ca624c42f309397c5fc9e21db90bf7e2fdd872461aabdbada33de9063c",
182 | "sha256:72bc3a5effa5974be6d965ed8301ac1e869bc18425c8a8fac179fbe7876e3aee",
183 | "sha256:74d86e8924835f863c34e646392ef39039405f6ce52956d8af16497af4064a30",
184 | "sha256:79e5af1ff258bc0fe0bdd6f69bc4ae33935a898e3cbefbbccf22e88a27fa053b",
185 | "sha256:7b103dffb9f6a47ed7ffdf352b78cfe058b1777617371226c1894e1be443afec",
186 | "sha256:83f03f0bd88c12e63ca2d024adeee75234d69808b341e88343b0232329e1f1a1",
187 | "sha256:86d7a68fa53688e1f612c3246044157117403c7ce19ebab7d02daf45bd63913e",
188 | "sha256:878c626cbca3b649e14e972c14539a01191d79e58934e3f3ef4a9e17f90277f8",
189 | "sha256:878f5d649ba1db9f52cc4ef491f7dba2d061cdc48dd444c54260eebc0b1729b9",
190 | "sha256:87bc01226cd288f0bd9a4f9f07bf6827134dc97a96c22e2d28628e824c8de231",
191 | "sha256:8babb2b5751105dc0aef2a2e539f4ba391e738c62038d8cb331c710f6b0f3da7",
192 | "sha256:91e0f7e7be77250b808a5f46d90bf0032527d3c032b2131b63dee54753a4d729",
193 | "sha256:9557545c10d52c845f270b665b52a6a972884725aa5cf12777374e18f2ea8960",
194 | "sha256:9ccb0a4ab926016867260c24c192d9df9586e834f5db83dfa2c8fffb3a6e5056",
195 | "sha256:9d828c5987d543d052b53c579a01a52d96b86f937b1777bbfe11ef2728929357",
196 | "sha256:9efa41d1527b366c88f265a227b20bcec65bda879962e3fc8a2aee11e81266d7",
197 | "sha256:aaf5317c961d93c1a200b9370fb1c6b6836cc7144fef3e5a951326912bf1f5a3",
198 | "sha256:ab69b4fe09e296261377d209068d52402fb85ef89dc78a9ac4a29a895f4e24a7",
199 | "sha256:ad397bc7d51d69cb07ef89e44243f971a04ce1dca9bf24c992c362406c0c6573",
200 | "sha256:ae17fc8103f3b63345709d3e9654a274eee1c6072592aec32b026efd401931d0",
201 | "sha256:af4d8cc28e4c7a2f6a9fed544228c567340f8258b6d7ea815b62a72817bbd178",
202 | "sha256:b22ff939a8856a44f4822da38ef4868bd3a9ade22bb6d9062b36957c850e404f",
203 | "sha256:b549d851f91a4efb3e65498bd4249b1447ab6035a9972f7fc215eb1f59328834",
204 | "sha256:be319f4eb400ee567b722e9ea63d5b2bb31464e3cf1b016502e3ee2de4f86f5c",
205 | "sha256:c0446b2871335d5a5e9fcf1462f954586b09a845832263db95059dcd01442015",
206 | "sha256:c68d2c04f7701a418ec2e5631b7f3552efc32f6bcc1739369c6eeb1af55f62e0",
207 | "sha256:c87ac58b9baaf50b6c1b81a18d20eda7e2883aa9a4fb4f1ca70f2e443bfcdc57",
208 | "sha256:caa2734ada16a44ae57b229d45091f06e30a9a52ace76d7574546ab23008c635",
209 | "sha256:cb34c2d66355fb70ae47b5595aafd7218e59bb9c00ad8cc3abd1406ca5874f07",
210 | "sha256:cb3652bbe6720786b9137862205986f3ae54a09dec8499a995ed58292bdf77c2",
211 | "sha256:cf668f26604e9f7aee9f8eaae4ca07a948168af90b96be97a4b7fa902a6d2ac1",
212 | "sha256:d326ff80ed531bf2507cba93011c30fff2dd51454c85f55df0f59f2030b1687b",
213 | "sha256:d6c2441538e4fadd4291c8420853431a229fcbefc1bf521810fbc2629d8ae8c2",
214 | "sha256:d6ecfd1970b3380a569d7b3ecc5dd70dba295897418ed9e31ec3c16a5ab099a5",
215 | "sha256:e5602a9b5074dcacc113bba4d2f011d2748f50e3201c8139ac5b68cf2a76bd8b",
216 | "sha256:ef806f684f17dbd6263d72a54ad4073af42b42effa3eb42b877e750c24c76f86",
217 | "sha256:f3356afbb301ec34a500b8ba8b47cba0b44ed4641c306e1dd981a08b416170b5",
218 | "sha256:f6f7ee2289176cb1d2c59a24f50900f8b9580259fa9f1a739432242e7d254f93",
219 | "sha256:f7e8f1ee28e0a05831c92dc1c0c1c94af5289963b7cf09eca5b5e3ce4f8c91b0",
220 | "sha256:f8169ec628880bdbca67082a9196e2106060a4a5cbd486ac51881a4df805a36f",
221 | "sha256:fbc88d3ba402b5d041d204ec2449c4078898f89c4a6e6f0ed1c1a510ef1e221d",
222 | "sha256:fbd3fe37353c62fd0eb19fb76f78aa693716262bcd5f9c14bb9e5aca4b3f0dc4"
223 | ],
224 | "markers": "python_version >= '3.6'",
225 | "version": "==2022.3.2"
226 | },
227 | "requests": {
228 | "hashes": [
229 | "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
230 | "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
231 | ],
232 | "index": "pypi",
233 | "version": "==2.27.1"
234 | },
235 | "rfc3986": {
236 | "extras": [
237 | "idna2008"
238 | ],
239 | "hashes": [
240 | "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835",
241 | "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"
242 | ],
243 | "version": "==1.5.0"
244 | },
245 | "six": {
246 | "hashes": [
247 | "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
248 | "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
249 | ],
250 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
251 | "version": "==1.16.0"
252 | },
253 | "sniffio": {
254 | "hashes": [
255 | "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663",
256 | "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"
257 | ],
258 | "markers": "python_version >= '3.5'",
259 | "version": "==1.2.0"
260 | },
261 | "tzdata": {
262 | "hashes": [
263 | "sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9",
264 | "sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3"
265 | ],
266 | "markers": "python_version >= '3.6'",
267 | "version": "==2022.1"
268 | },
269 | "tzlocal": {
270 | "hashes": [
271 | "sha256:0f28015ac68a5c067210400a9197fc5d36ba9bc3f8eaf1da3cbd59acdfed9e09",
272 | "sha256:28ba8d9fcb6c9a782d6e0078b4f6627af1ea26aeaa32b4eab5324abc7df4149f"
273 | ],
274 | "markers": "python_version >= '3.6'",
275 | "version": "==4.1"
276 | },
277 | "urllib3": {
278 | "hashes": [
279 | "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
280 | "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
281 | ],
282 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
283 | "version": "==1.26.9"
284 | },
285 | "zope.interface": {
286 | "hashes": [
287 | "sha256:08f9636e99a9d5410181ba0729e0408d3d8748026ea938f3b970a0249daa8192",
288 | "sha256:0b465ae0962d49c68aa9733ba92a001b2a0933c317780435f00be7ecb959c702",
289 | "sha256:0cba8477e300d64a11a9789ed40ee8932b59f9ee05f85276dbb4b59acee5dd09",
290 | "sha256:0cee5187b60ed26d56eb2960136288ce91bcf61e2a9405660d271d1f122a69a4",
291 | "sha256:0ea1d73b7c9dcbc5080bb8aaffb776f1c68e807767069b9ccdd06f27a161914a",
292 | "sha256:0f91b5b948686659a8e28b728ff5e74b1be6bf40cb04704453617e5f1e945ef3",
293 | "sha256:15e7d1f7a6ee16572e21e3576d2012b2778cbacf75eb4b7400be37455f5ca8bf",
294 | "sha256:17776ecd3a1fdd2b2cd5373e5ef8b307162f581c693575ec62e7c5399d80794c",
295 | "sha256:194d0bcb1374ac3e1e023961610dc8f2c78a0f5f634d0c737691e215569e640d",
296 | "sha256:1c0e316c9add0db48a5b703833881351444398b04111188069a26a61cfb4df78",
297 | "sha256:205e40ccde0f37496904572035deea747390a8b7dc65146d30b96e2dd1359a83",
298 | "sha256:273f158fabc5ea33cbc936da0ab3d4ba80ede5351babc4f577d768e057651531",
299 | "sha256:2876246527c91e101184f63ccd1d716ec9c46519cc5f3d5375a3351c46467c46",
300 | "sha256:2c98384b254b37ce50eddd55db8d381a5c53b4c10ee66e1e7fe749824f894021",
301 | "sha256:2e5a26f16503be6c826abca904e45f1a44ff275fdb7e9d1b75c10671c26f8b94",
302 | "sha256:334701327f37c47fa628fc8b8d28c7d7730ce7daaf4bda1efb741679c2b087fc",
303 | "sha256:3748fac0d0f6a304e674955ab1365d515993b3a0a865e16a11ec9d86fb307f63",
304 | "sha256:3c02411a3b62668200910090a0dff17c0b25aaa36145082a5a6adf08fa281e54",
305 | "sha256:3dd4952748521205697bc2802e4afac5ed4b02909bb799ba1fe239f77fd4e117",
306 | "sha256:3f24df7124c323fceb53ff6168da70dbfbae1442b4f3da439cd441681f54fe25",
307 | "sha256:469e2407e0fe9880ac690a3666f03eb4c3c444411a5a5fddfdabc5d184a79f05",
308 | "sha256:4de4bc9b6d35c5af65b454d3e9bc98c50eb3960d5a3762c9438df57427134b8e",
309 | "sha256:5208ebd5152e040640518a77827bdfcc73773a15a33d6644015b763b9c9febc1",
310 | "sha256:52de7fc6c21b419078008f697fd4103dbc763288b1406b4562554bd47514c004",
311 | "sha256:5bb3489b4558e49ad2c5118137cfeaf59434f9737fa9c5deefc72d22c23822e2",
312 | "sha256:5dba5f530fec3f0988d83b78cc591b58c0b6eb8431a85edd1569a0539a8a5a0e",
313 | "sha256:5dd9ca406499444f4c8299f803d4a14edf7890ecc595c8b1c7115c2342cadc5f",
314 | "sha256:5f931a1c21dfa7a9c573ec1f50a31135ccce84e32507c54e1ea404894c5eb96f",
315 | "sha256:63b82bb63de7c821428d513607e84c6d97d58afd1fe2eb645030bdc185440120",
316 | "sha256:66c0061c91b3b9cf542131148ef7ecbecb2690d48d1612ec386de9d36766058f",
317 | "sha256:6f0c02cbb9691b7c91d5009108f975f8ffeab5dff8f26d62e21c493060eff2a1",
318 | "sha256:71aace0c42d53abe6fc7f726c5d3b60d90f3c5c055a447950ad6ea9cec2e37d9",
319 | "sha256:7d97a4306898b05404a0dcdc32d9709b7d8832c0c542b861d9a826301719794e",
320 | "sha256:7df1e1c05304f26faa49fa752a8c690126cf98b40b91d54e6e9cc3b7d6ffe8b7",
321 | "sha256:8270252effc60b9642b423189a2fe90eb6b59e87cbee54549db3f5562ff8d1b8",
322 | "sha256:867a5ad16892bf20e6c4ea2aab1971f45645ff3102ad29bd84c86027fa99997b",
323 | "sha256:877473e675fdcc113c138813a5dd440da0769a2d81f4d86614e5d62b69497155",
324 | "sha256:8892f89999ffd992208754851e5a052f6b5db70a1e3f7d54b17c5211e37a98c7",
325 | "sha256:9a9845c4c6bb56e508651f005c4aeb0404e518c6f000d5a1123ab077ab769f5c",
326 | "sha256:a1e6e96217a0f72e2b8629e271e1b280c6fa3fe6e59fa8f6701bec14e3354325",
327 | "sha256:a8156e6a7f5e2a0ff0c5b21d6bcb45145efece1909efcbbbf48c56f8da68221d",
328 | "sha256:a9506a7e80bcf6eacfff7f804c0ad5350c8c95b9010e4356a4b36f5322f09abb",
329 | "sha256:af310ec8335016b5e52cae60cda4a4f2a60a788cbb949a4fbea13d441aa5a09e",
330 | "sha256:b0297b1e05fd128d26cc2460c810d42e205d16d76799526dfa8c8ccd50e74959",
331 | "sha256:bf68f4b2b6683e52bec69273562df15af352e5ed25d1b6641e7efddc5951d1a7",
332 | "sha256:d0c1bc2fa9a7285719e5678584f6b92572a5b639d0e471bb8d4b650a1a910920",
333 | "sha256:d4d9d6c1a455d4babd320203b918ccc7fcbefe308615c521062bc2ba1aa4d26e",
334 | "sha256:db1fa631737dab9fa0b37f3979d8d2631e348c3b4e8325d6873c2541d0ae5a48",
335 | "sha256:dd93ea5c0c7f3e25335ab7d22a507b1dc43976e1345508f845efc573d3d779d8",
336 | "sha256:f44e517131a98f7a76696a7b21b164bcb85291cee106a23beccce454e1f433a4",
337 | "sha256:f7ee479e96f7ee350db1cf24afa5685a5899e2b34992fb99e1f7c1b0b758d263"
338 | ],
339 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
340 | "version": "==5.4.0"
341 | }
342 | },
343 | "develop": {}
344 | }
345 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | A program to copy all your Kindle highlights and notes to a page in Notion.
9 |
10 | Explore the docs
11 | ·
12 | File issues and feature requests here
13 |
14 |
15 | If you found this script helpful or appreciate my work, you can support me here:
16 |
17 |
18 |
19 |
20 |
21 | [![Contributors][contributors-shield]][contributors-url]
22 | [![Forks][forks-shield]][forks-url]
23 | [![Stargazers][stars-shield]][stars-url]
24 | [![Issues][issues-shield]][issues-url]
25 | [![MIT License][license-shield]][license-url]
26 | [![LinkedIn][linkedin-shield]][linkedin-url]
27 |
28 |
29 | ## Table of Contents
30 |
31 | - [Table of Contents](#table-of-contents)
32 | - [About The Project](#about-the-project)
33 | - [Getting Started](#getting-started)
34 | - [Prerequisites](#prerequisites)
35 | - [Installation](#installation)
36 | - [Usage](#usage)
37 | - [Roadmap](#roadmap)
38 | - [Contributing](#contributing)
39 | - [License](#license)
40 | - [Contact](#contact)
41 |
42 |
43 |
44 |
45 | ## About The Project
46 |
47 | ![Kindle2Notion Demo][product-demo]
48 |
49 | A Python package to export all the clippings from your Kindle device to a page in Notion. Run this script whenever you plug in your Kindle device to your PC.
50 |
51 | A key inspiration behind this project was the notes saving feature on Google Play Books, which automatically syncs all your highlights from a book hosted on the service to a Google Doc in real time. I wanted a similar feature for my Kindle and this project is one step towards a solution for this problem.
52 |
53 | **Intended for**
54 | - Avid readers who would want to browse through their prior reads and highlights anytime anywhere.
55 | - For those who take notes alongside their highlights.
56 |
57 |
58 |
59 | ## Getting Started
60 |
61 |
62 | > **NOTE**
63 | > Need a step-by-step guide to setting this package up? Click [here](https://www.notion.so/kindle2notion/Kindle2Notion-8a9683c9b19546c3b1cf42a68aceebee) for the full guide.
64 |
65 | To get a local copy up and running follow these simple steps.
66 |
67 | ### Prerequisites
68 |
69 | * A Kindle device.
70 | * A Notion account to store your links.
71 | * Python 3 on your system to run the code.
72 |
73 | ### Installation
74 |
75 | 1. Install the library.
76 | ```sh
77 | pip install kindle2notion
78 | ```
79 | 2. Export your Kindle highlights and notes to Notion!
80 | - On MacOS and UNIX,
81 | ```sh
82 | kindle2notion 'your_notion_token' 'your_notion_table_id' 'your_kindle_clippings_file'
83 | ```
84 | - On Windows
85 | ```sh
86 | python -m kindle2notion 'your_notion_token' 'your_notion_table_id' 'your_kindle_clippings_file'
87 | ```
88 |
89 |
90 |
91 |
92 | ## Usage
93 |
94 | 1. Plug in your Kindle device to your PC.
95 |
96 | 2. Duplicate this [database template](https://www.notion.so/kindle2notion/6d26062e3bb04dd89b988806978c1fe7?v=0d394a8162cc481280966b35a37465c2) to your Notion workspace.
97 |
98 | 3. Find your Notion token. Since this code requires access of non-public pages, an authentication token from your Notion page is required. This token is stored in the `token_v2` cookie. This can be found in the *Storage* tab of your browser's developer tools.
99 | - For Chrome: Open Developer Tools (*Menu > Other tools > Developer Tools*), navigate to Application tab and go to *Storage\Cookies* to find the token listed next to *tokenv2_*.
100 |
101 | 4. Find your Notion table ID: it's simply the URL of the database you have copied to your workspace.
102 |
103 | 5. You may modify some default parameters of the command-line with the following options of the CLI:
104 | - ```--enable_highlight_date``` Set to False if you don't want to see the "Date Added" information in Notion.
105 | - ```--enable_book_cover``` Set to False if you don't want to store the book cover in Notion.
106 |
107 | 6. Export your Kindle highlights and notes to Notion!
108 | - On MacOS and UNIX,
109 | ```sh
110 | kindle2notion 'your_notion_token' 'your_notion_table_id' 'your_kindle_clippings_file'
111 | ```
112 | - On Windows
113 | ```sh
114 | python -m kindle2notion 'your_notion_token' 'your_notion_table_id' 'your_kindle_clippings_file'
115 | ```
116 | You may also avail help with the following command:
117 | ```sh
118 | kindle2notion --help
119 | python -m kindle2notion --help
120 | ```
121 |
122 | [**Note:** This code has been tested on a 4th Gen Kindle Paperwhite on both MacOS and Windows.]
123 |
124 |
125 |
126 | ## Roadmap
127 |
128 | See the [open issues](https://github.com/paperboi/Kindle2Notion/issues) for a list of proposed features (and known issues).
129 |
130 |
131 |
132 |
133 | ## Contributing
134 |
135 |
136 | Any contributions you make are **greatly appreciated**.
137 |
138 | 1. Fork the Project
139 | 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
140 | 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
141 | 4. Push to the Branch (`git push origin feature/AmazingFeature`)
142 | 5. Open a Pull Request
143 |
144 |
145 |
146 |
147 | ## License
148 |
149 | Distributed under the MIT License. See [LICENSE][license-url] for more information.
150 |
151 |
152 |
153 |
154 | ## Contact
155 |
156 | Jeffrey Jacob ([Twitter](https://twitter.com/jeffreysamjacob) | [Email](mailto:jeffreysamjacob@gmail.com) | [LinkedIn](https://www.linkedin.com/in/jeffreysamjacob/))
157 |
158 |
159 |
160 | [contributors-shield]: https://img.shields.io/github/contributors/paperboi/Kindle2Notion.svg?style=flat-square
161 | [contributors-url]: https://github.com/paperboi/Kindle2Notion/graphs/contributors
162 | [forks-shield]: https://img.shields.io/github/forks/paperboi/Kindle2Notion.svg?style=flat-square
163 | [forks-url]: https://github.com/paperboi/Kindle2Notion/network/members
164 | [stars-shield]: https://img.shields.io/github/stars/paperboi/Kindle2Notion.svg?style=flat-square
165 | [stars-url]: https://github.com/paperboi/Kindle2Notion/stargazers
166 | [issues-shield]: https://img.shields.io/github/issues/paperboi/Kindle2Notion.svg?style=flat-square
167 | [issues-url]: https://github.com/paperboi/Kindle2Notion/issues
168 | [license-shield]: https://img.shields.io/github/license/paperboi/Kindle2Notion.svg?style=flat-square
169 | [license-url]: https://github.com/paperboi/kindle2notion/blob/master/LICENSE
170 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=flat-square&logo=linkedin&colorB=555
171 | [linkedin-url]: https://www.linkedin.com/in/jeffreysamjacob/
172 | [product-demo]: https://i.imgur.com/IlDmEOy.gif
173 |
--------------------------------------------------------------------------------
/kindle2notion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wsine/kindle2notion/b1e54b93c1129b2f287d92449fc24dfde81f1d7d/kindle2notion/__init__.py
--------------------------------------------------------------------------------
/kindle2notion/__main__.py:
--------------------------------------------------------------------------------
1 | import click
2 |
3 | from kindle2notion.exporting import export_to_notion
4 | from kindle2notion.parsing import parse_raw_clippings_text
5 | from kindle2notion.reading import read_raw_clippings
6 |
7 | from notion_client import Client
8 |
9 | @click.command()
10 | @click.argument("notion_token")
11 | @click.argument("notion_table_id")
12 | @click.argument("clippings_file_path")
13 | @click.option(
14 | "--enable_highlight_date",
15 | default=True,
16 | help='Set to False if you don\'t want to see the "Date Added" information in Notion.',
17 | )
18 | @click.option(
19 | "--enable_book_cover",
20 | default=True,
21 | help="Set to False if you don't want to store the book cover in Notion.",
22 | )
23 | def main(
24 | notion_token,
25 | notion_table_id,
26 | clippings_file_path,
27 | enable_highlight_date,
28 | enable_book_cover,
29 | ):
30 |
31 | notion_client = Client(auth=notion_token)
32 | notion_collection_view = notion_client.databases.retrieve(notion_table_id)
33 |
34 | if len(notion_collection_view) > 0:
35 | print("Notion page is found. Analyzing clippings file...")
36 | all_clippings = read_raw_clippings(clippings_file_path)
37 | books = parse_raw_clippings_text(all_clippings)
38 | export_to_notion(
39 | books,
40 | enable_highlight_date,
41 | enable_book_cover,
42 | notion_token,
43 | notion_table_id,
44 | )
45 | print("Transfer complete... Exiting script...")
46 |
47 |
48 | if __name__ == "__main__":
49 | main()
50 |
--------------------------------------------------------------------------------
/kindle2notion/exporting.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from typing import Dict, List, Tuple
3 | from dateparser import parse
4 | from dateutil.tz import tzlocal
5 |
6 | from notion_client import Client
7 | from requests import get
8 |
9 | NO_COVER_IMG = "https://via.placeholder.com/150x200?text=No%20Cover"
10 | ITALIC = "*"
11 | BOLD = "**"
12 |
13 | # TODO: Refactor this module
14 |
15 | def export_to_notion(
16 | books: Dict,
17 | enable_highlight_date: bool,
18 | enable_book_cover: bool,
19 | notion_token: str,
20 | notion_table_id: str,
21 | ) -> None:
22 | print("Initiating transfer...\n")
23 |
24 | for title in books:
25 | print("Checking book: " + title)
26 |
27 | book = books[title]
28 | author = book["author"]
29 | highlights = book["highlights"]
30 | highlight_count = len(highlights)
31 | (
32 | aggregated_text_from_highlights,
33 | last_date,
34 | ) = _prepare_aggregated_text_for_one_book(highlights, enable_highlight_date)
35 | message = _add_book_to_notion(
36 | title,
37 | author,
38 | highlight_count,
39 | aggregated_text_from_highlights,
40 | last_date,
41 | notion_token,
42 | notion_table_id,
43 | enable_book_cover,
44 | )
45 | if message != "None to add":
46 | print("✓", message)
47 |
48 | def _prepare_aggregated_text_for_one_book(
49 | highlights: List, enable_highlight_date: bool
50 | ) -> Tuple[str, str]:
51 | aggregated_text = ""
52 | for highlight in highlights:
53 | text = highlight[0]
54 | page = highlight[1]
55 | location = highlight[2]
56 | date = highlight[3]
57 | isNote = highlight[4]
58 | if isNote == True:
59 | aggregated_text += BOLD + "Note: " + BOLD
60 |
61 | aggregated_text += text + "\n("
62 | if page != "":
63 | aggregated_text += "Page: " + page + " "
64 | if location != "":
65 | aggregated_text += "Location: " + location + " "
66 | if enable_highlight_date and (date != ""):
67 | aggregated_text += "Date Added: " + date
68 |
69 | aggregated_text = aggregated_text.strip() + ")\n\n"
70 | last_date = date
71 | return aggregated_text, last_date
72 |
73 |
74 | def _add_book_to_notion(
75 | title: str,
76 | author: str,
77 | highlight_count: int,
78 | aggregated_text: str,
79 | last_date: str,
80 | notion_token: str,
81 | notion_table_id: str,
82 | enable_book_cover: bool,
83 | ) -> str:
84 | notion_client = Client(auth=notion_token)
85 | notion_books_database = notion_client.databases.retrieve(notion_table_id)
86 | notion_books = notion_client.databases.query(notion_books_database['id']).get('results')
87 |
88 | title_exists = False
89 | if notion_books:
90 | for c_row in notion_books:
91 | book_info = c_row.get('properties')
92 | if title == book_info['Title']['title'][0]['plain_text']:
93 | title_exists = True
94 | row = c_row
95 |
96 | if row['properties']['Highlights']['number'] is None:
97 | row['properties']['Highlights']['number'] = 0
98 | elif row['properties']['Highlights']['number'] == highlight_count:
99 | return "None to add"
100 |
101 |
102 | title_and_author = title + " (" + str(author) + ")"
103 | print(title_and_author)
104 | print("-" * len(title_and_author))
105 |
106 | if not title_exists:
107 | new_page = {
108 | "Title": {"title": [{"text": {"content": title}}]},
109 | "Author": {
110 | "type": "rich_text",
111 | "rich_text": [
112 | {
113 | "type": "text",
114 | "text": {"content": author},
115 | }
116 | ],
117 | },
118 | "Highlights": {"type": "number", "number": 0},
119 | }
120 | row = notion_client.pages.create(parent={"database_id": notion_table_id}, properties=new_page)
121 |
122 |
123 | parent_page = notion_client.pages.retrieve(row['id'])
124 |
125 | for all_blocks in notion_client.blocks.children.list(parent_page['id'])['results']:
126 | notion_client.blocks.delete(all_blocks['id'])
127 |
128 | # Split aggregated_text into paragraphs
129 | chunk_size = 1500
130 | chunks = [{'type': 'text', 'text': {'content': aggregated_text[i:i+chunk_size]}} for i in range(0, len(aggregated_text), chunk_size)]
131 |
132 | new_block = {
133 | 'object': 'block',
134 | 'type': 'paragraph',
135 | 'paragraph': {
136 | 'text': chunks,
137 | }
138 | }
139 | notion_client.blocks.children.append(block_id=parent_page['id'], children=[new_block])
140 |
141 | diff_count = highlight_count - (row['properties']['Highlights']['number'] or 0)
142 | updated_info = {
143 | "Highlights": {"type": "number", "number": highlight_count},
144 | # "Last Highlighted": {"type": "date", "date": {'start': parse(last_date).replace(tzinfo=tzlocal()).isoformat()}},
145 | "Last Synced": {"type": "date", "date": {'start': datetime.now(tzlocal()).isoformat()}},
146 | }
147 | notion_client.pages.update(page_id=row['id'], properties=updated_info)
148 |
149 | message = str(diff_count) + " notes / highlights added successfully\n"
150 | return message
151 |
--------------------------------------------------------------------------------
/kindle2notion/parsing.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Dict, List, Tuple
3 |
4 | from dateparser import parse
5 |
6 | ACADEMIC_TITLES = [
7 | "A.A.",
8 | "A.S.",
9 | "A.A.A.",
10 | "A.A.S.",
11 | "A.B.",
12 | "A.D.N.",
13 | "A.M.",
14 | "A.M.T.",
15 | "C.E.",
16 | "Ch.E.",
17 | "D.A.",
18 | "D.A.S.",
19 | "D.B.A.",
20 | "D.C.",
21 | "D.D.",
22 | "D.Ed.",
23 | "D.L.S.",
24 | "D.M.D.",
25 | "D.M.S.",
26 | "D.P.A.",
27 | "D.P.H.",
28 | "D.R.E.",
29 | "D.S.W.",
30 | "D.Sc.",
31 | "D.V.M.",
32 | "Ed.D.",
33 | "Ed.S.",
34 | "E.E.",
35 | "E.M.",
36 | "E.Met.",
37 | "I.E.",
38 | "J.D.",
39 | "J.S.D.",
40 | "L.H.D.",
41 | "Litt.B.",
42 | "Litt.M.",
43 | "LL.B.",
44 | "LL.D.",
45 | "LL.M.",
46 | "M.A.",
47 | "M.Aero.E.",
48 | "M.B.A.",
49 | "M.C.S.",
50 | "M.D.",
51 | "M.Div.",
52 | "M.E.",
53 | "M.Ed.",
54 | "M.Eng.",
55 | "M.F.A.",
56 | "M.H.A.",
57 | "M.L.S.",
58 | "M.Mus.",
59 | "M.N.",
60 | "M.P.A.",
61 | "M.S.",
62 | "M.S.Ed.",
63 | "M.S.W.",
64 | "M.Th.",
65 | "Nuc.E.",
66 | "O.D.",
67 | "Pharm.D.",
68 | "Ph.B.",
69 | "Ph.D.",
70 | "S.B.",
71 | "Sc.D.",
72 | "S.J.D.",
73 | "S.Sc.D.",
74 | "Th.B.",
75 | "Th.D.",
76 | "Th.M.",
77 | ]
78 |
79 | DELIMITERS = ["; ", " & ", " and "]
80 |
81 |
82 | def parse_raw_clippings_text(raw_clippings_text: str) -> Dict:
83 | raw_clippings_list = raw_clippings_text.split("==========")
84 | print(f"Found {len(raw_clippings_list)} notes and highlights.\n")
85 |
86 | books = {}
87 | passed_clippings_count = 0
88 |
89 | for raw_clipping in raw_clippings_list:
90 | raw_clipping_list = raw_clipping.strip().split("\n")
91 |
92 | if _is_valid_clipping(raw_clipping_list):
93 | author, title = _parse_author_and_title(raw_clipping_list)
94 | page, location, date, isNote = _parse_page_location_date_and_note(
95 | raw_clipping_list
96 | )
97 | highlight = raw_clipping_list[3]
98 |
99 | books = _add_parsed_items_to_books_dict(
100 | books, title, author, highlight, page, location, date, isNote
101 | )
102 | else:
103 | passed_clippings_count += 1
104 |
105 | print(f"× Passed {passed_clippings_count} bookmarks or unsupported clippings.\n")
106 | return books
107 |
108 |
109 | def _is_valid_clipping(raw_clipping_list: List) -> bool:
110 | return len(raw_clipping_list) >= 3
111 |
112 |
113 | def _parse_author_and_title(raw_clipping_list: List) -> Tuple[str, str]:
114 | author, title = _parse_raw_author_and_title(raw_clipping_list)
115 | author, title = _deal_with_exceptions_in_author_name(author, title)
116 | title = _deal_with_exceptions_in_title(title)
117 | return author, title
118 |
119 |
120 | def _parse_page_location_date_and_note(
121 | raw_clipping_list: List,
122 | ) -> Tuple[str, str, str, bool]:
123 | second_line = raw_clipping_list[1]
124 | second_line_as_list = second_line.strip().split(" | ")
125 | page = location = date = ""
126 | isNote = False
127 |
128 | for element in second_line_as_list:
129 | element = element.lower()
130 | if "note" in element:
131 | isNote = True
132 | if "page" in element:
133 | page = element[element.find("page") :].replace("page", "").strip()
134 | if "location" in element:
135 | location = (
136 | element[element.find("location") :].replace("location", "").strip()
137 | )
138 | if "added on" in element:
139 | date = parse(
140 | element[element.find("added on") :].replace("added on", "").strip()
141 | )
142 | date = date.strftime("%A, %d %B %Y %I:%M:%S %p")
143 | if "页" in element:
144 | page = element[element.find("第") : element.find("页") + 1].strip()
145 | if "位置" in element:
146 | location = element[element.find("位置") - 1 :].strip()
147 | if "添加于" in element:
148 | date = element
149 |
150 |
151 | return page, location, date, isNote
152 |
153 |
154 | def _add_parsed_items_to_books_dict(
155 | books: Dict,
156 | title: str,
157 | author: str,
158 | highlight: str,
159 | page: str,
160 | location: str,
161 | date: str,
162 | isNote: bool,
163 | ) -> Dict:
164 | if title not in books:
165 | books[title] = {"author": author, "highlights": []}
166 | books[title]["highlights"].append((highlight, page, location, date, isNote))
167 | return books
168 |
169 |
170 | def _parse_raw_author_and_title(raw_clipping_list: List) -> Tuple[str, str]:
171 | author = ""
172 | title = raw_clipping_list[0]
173 |
174 | if re.findall(r"\(.*?\)", raw_clipping_list[0]):
175 | author = (re.findall(r"\(.*?\)", raw_clipping_list[0]))[-1]
176 | author = author.removeprefix("(").removesuffix(")")
177 | else:
178 | print(
179 | "No author found. You can manually add the Author details in the Notion database."
180 | )
181 |
182 | title = raw_clipping_list[0].replace(author, "").strip().replace(" ()", "")
183 |
184 | return author, title
185 |
186 |
187 | def _deal_with_exceptions_in_author_name(author: str, title: str) -> Tuple[str, str]:
188 | if "(" in author:
189 | author = author + ")"
190 | title = title.removesuffix(")")
191 |
192 | if ", " in author and all(x not in author for x in DELIMITERS):
193 | if (author.split(", "))[1] not in ACADEMIC_TITLES:
194 | author = " ".join(reversed(author.split(", ")))
195 |
196 | if "; " in author:
197 | authorList = author.split("; ")
198 | author = ""
199 | for ele in authorList:
200 | author += " ".join(reversed(ele.split(", "))) + ", "
201 | author = author.removesuffix(", ")
202 | return author, title
203 |
204 |
205 | def _deal_with_exceptions_in_title(title: str) -> str:
206 | if ", The" in title:
207 | title = "The " + title.replace(", The", "")
208 | return title
209 |
--------------------------------------------------------------------------------
/kindle2notion/reading.py:
--------------------------------------------------------------------------------
1 | import unicodedata
2 | from pathlib import Path
3 |
4 |
5 | def read_raw_clippings(clippings_file_path: Path) -> str:
6 | raw_clippings_text = open(clippings_file_path, "r", encoding="utf-8-sig").read()
7 | # raw_clippings_text = raw_clippings_text.encode("ascii", errors="ignore").decode()
8 | raw_clippings_text = unicodedata.normalize("NFKD", raw_clippings_text)
9 | return raw_clippings_text
10 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 | build-backend = "setuptools.build_meta:__legacy__"
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | flake8>=3.9.2
2 | pytest>=6.2.4
3 | pytest-cov>=2.12.0
4 | black>=21.5b2
5 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | click>=8.0.0
2 | dateparser>=1.0.0
3 | DateTime>=4.3
4 | python-decouple>=3.3
5 | requests>=2.25.0
6 | pathlib
7 | notion-client
8 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude =
3 | .git,
4 | __pycache__,
5 | venv,
6 | idea,
7 | .venv
8 | max-line-length = 120
9 | inline-quotes = single
10 | multiline-quotes = '''
11 | avoid-escape = True
12 |
13 | [tool:pytest]
14 | testpaths = tests/
15 | norecursedirs = .git venv/ .pytest_cache/ main/
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | with open("README.md", "r", encoding="utf-8") as f:
4 | long_description = f.read()
5 |
6 | with open("requirements.txt", "r", encoding="utf-8") as f:
7 | requirements = f.read()
8 |
9 | with open("requirements-dev.txt", "r", encoding="utf-8") as f:
10 | requirements_dev = f.read()
11 |
12 | setup(
13 | name="kindle2notion",
14 | version="0.1.8",
15 | author="Jeffrey Jacob",
16 | author_email="jeffreysamjacob@gmail.com",
17 | description="Export all the clippings from your Kindle device to a database in Notion.",
18 | long_description=long_description,
19 | long_description_content_type="text/markdown",
20 | url="https://github.com/paperboi/kindle2notion",
21 | classifiers=[
22 | "Programming Language :: Python :: 3",
23 | "License :: OSI Approved :: MIT License",
24 | "Operating System :: OS Independent",
25 | ],
26 | packages=find_packages(),
27 | install_requires=requirements,
28 | extras_require={"dev": requirements_dev},
29 | python_requires=">=3.9",
30 | entry_points={
31 | "console_scripts": [
32 | "kindle2notion = kindle2notion.__main__:main",
33 | ],
34 | },
35 | )
36 |
--------------------------------------------------------------------------------
/tests/test_data/Test Clippings.txt:
--------------------------------------------------------------------------------
1 | Title 1: A Great Book (Horowitz, Ben)
2 | - Your Highlight on page 11 | Location 111-114 | Added on Tuesday, September 22, 2020 9:23:48 AM
3 |
4 | This is test highlight 1.
5 | ==========
6 | Title 1: A Great Book (Horowitz, Ben)
7 | - Your Highlight on page 11 | Location 111-114 | Added on Tuesday, September 22, 2020 9:24:04 AM
8 |
9 | This is test highlight 2.
10 | ==========
11 | Title 2 Is Good Too (Bryar, Colin)
12 | - Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM
13 |
14 | This is test highlight 3.
15 | ==========
16 | Title 2 Is Good Too (Bryar, Colin)
17 | - Your Highlight on page 34 | Location 682-684 | Added on Friday, April 30, 2021 3:14:33 PM
18 |
19 | This is test highlight 4.
20 | ==========
21 | Title 3 Is Clean (Robert C. Martin Series) (C., Martin Robert)
22 | - Your Highlight on page 22 | Location 559-560 | Added on Saturday, May 15, 2021 10:25:42 PM
23 |
24 | This is test highlight 5.
25 | ==========
26 | Title 3 Is Clean (Robert C. Martin Series) (C., Martin Robert)
27 | - Your Highlight on page 22 | Location 564-565 | Added on Saturday, May 15, 2021 10:26:26 PM
28 |
29 | This is test highlight 6.
30 | ==========
--------------------------------------------------------------------------------
/tests/test_exporting.py:
--------------------------------------------------------------------------------
1 | from kindle2notion.exporting import _prepare_aggregated_text_for_one_book
2 |
3 |
4 | def test_prepare_aggregated_text_for_one_book_should_return_the_aggregated_text_when_highlight_date_is_disabled():
5 | # Given
6 | highlights = [
7 | (
8 | "This is an example highlight.",
9 | "1",
10 | "100",
11 | "Thursday, 29 April 2021 12:31:29 AM",
12 | ),
13 | (
14 | "This is a second example highlight.",
15 | "2",
16 | "200",
17 | "Friday, 30 April 2021 12:31:29 AM",
18 | ),
19 | ]
20 |
21 | expected = (
22 | "This is an example highlight.\n(*Page: 1* *Location: 100*)\n\n"
23 | "This is a second example highlight.\n(*Page: 2* *Location: 200*)\n\n",
24 | "Friday, 30 April 2021 12:31:29 AM",
25 | )
26 |
27 | # When
28 | actual = _prepare_aggregated_text_for_one_book(
29 | highlights, enable_highlight_date=False
30 | )
31 |
32 | # Then
33 | assert expected == actual
34 |
35 |
36 | def test_prepare_aggregated_text_for_one_book_should_return_the_aggregated_text_when_highlight_date_is_enabled():
37 | # Given
38 | highlights = [
39 | (
40 | "This is an example highlight.",
41 | "1",
42 | "100",
43 | "Thursday, 29 April 2021 12:31:29 AM",
44 | ),
45 | (
46 | "This is a second example highlight.",
47 | "2",
48 | "200",
49 | "Friday, 30 April 2021 12:31:29 AM",
50 | ),
51 | ]
52 |
53 | expected = (
54 | "This is an example highlight.\n"
55 | "(*Page: 1* *Location: 100* *Date Added: Thursday, 29 April 2021 12:31:29 AM*)\n\n"
56 | "This is a second example highlight.\n"
57 | "(*Page: 2* *Location: 200* *Date Added: Friday, 30 April 2021 12:31:29 AM*)\n\n",
58 | "Friday, 30 April 2021 12:31:29 AM",
59 | )
60 |
61 | # When
62 | actual = _prepare_aggregated_text_for_one_book(
63 | highlights, enable_highlight_date=True
64 | )
65 |
66 | # Then
67 | assert expected == actual
68 |
--------------------------------------------------------------------------------
/tests/test_parsing.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from pathlib import Path
3 |
4 | from kindle2notion.parsing import (
5 | parse_raw_clippings_text,
6 | _parse_author_and_title,
7 | _parse_page_location_date_and_note,
8 | _add_parsed_items_to_books_dict,
9 | )
10 | from kindle2notion.reading import read_raw_clippings
11 |
12 |
13 | def test_parse_raw_clippings_text_should_return_a_dict_with_all_the_parsed_information():
14 | # Given
15 | test_clippings_file_path = (
16 | Path(__file__).parent.absolute() / "test_data/Test Clippings.txt"
17 | )
18 | raw_clippings_text = read_raw_clippings(test_clippings_file_path)
19 |
20 | expected = {
21 | "Title 1: A Great Book": {
22 | "author": "Ben Horowitz",
23 | "highlights": [
24 | (
25 | "This is test highlight 1.",
26 | "11",
27 | "111-114",
28 | "Tuesday, 22 September 2020 09:23:48 AM",
29 | ),
30 | (
31 | "This is test highlight 2.",
32 | "11",
33 | "111-114",
34 | "Tuesday, 22 September 2020 09:24:04 AM",
35 | ),
36 | ],
37 | },
38 | "Title 2 Is Good Too": {
39 | "author": "Colin Bryar",
40 | "highlights": [
41 | (
42 | "This is test highlight 3.",
43 | "3",
44 | "184-185",
45 | "Friday, 30 April 2021 12:31:29 AM",
46 | ),
47 | (
48 | "This is test highlight 4.",
49 | "34",
50 | "682-684",
51 | "Friday, 30 April 2021 03:14:33 PM",
52 | ),
53 | ],
54 | },
55 | "Title 3 Is Clean (Robert C. Martin Series)": {
56 | "author": "Martin Robert C.",
57 | "highlights": [
58 | (
59 | "This is test highlight 5.",
60 | "22",
61 | "559-560",
62 | "Saturday, 15 May 2021 10:25:42 PM",
63 | ),
64 | (
65 | "This is test highlight 6.",
66 | "22",
67 | "564-565",
68 | "Saturday, 15 May 2021 10:26:26 PM",
69 | ),
70 | ],
71 | },
72 | }
73 |
74 | # When
75 | actual = parse_raw_clippings_text(raw_clippings_text)
76 |
77 | # Then
78 | assert expected == actual
79 |
80 |
81 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_the_author_name_is_formatted_with_a_comma():
82 | # Given
83 | raw_clipping_list = [
84 | "Relativity (Einstein, Albert)",
85 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
86 | "",
87 | "This is a test highlight.",
88 | ]
89 | expected = ("Albert Einstein", "Relativity")
90 |
91 | # When
92 | actual = _parse_author_and_title(raw_clipping_list)
93 |
94 | # Then
95 | assert expected == actual
96 |
97 |
98 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_the_author_name_is_first_name_last_name():
99 | # Given
100 | raw_clipping_list = [
101 | "Relativity (Albert Einstein)",
102 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
103 | "",
104 | "This is a test highlight.",
105 | ]
106 | expected = ("Albert Einstein", "Relativity")
107 |
108 | # When
109 | actual = _parse_author_and_title(raw_clipping_list)
110 |
111 | # Then
112 | assert expected == actual
113 |
114 |
115 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_there_are_parentheses_in_the_author_name():
116 | # Given
117 | raw_clipping_list = [
118 | "Candide (Voltaire (François-Marie Arouet))",
119 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
120 | "",
121 | "This is a test highlight.",
122 | ]
123 | expected = ("Voltaire (François-Marie Arouet)", "Candide")
124 |
125 | # When
126 | actual = _parse_author_and_title(raw_clipping_list)
127 |
128 | # Then
129 | assert expected == actual
130 |
131 |
132 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_there_is_a_The_at_the_end_of_the_title():
133 | # Given
134 | raw_clipping_list = [
135 | "Age of Louis XIV, The (Voltaire (François-Marie Arouet))",
136 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
137 | "",
138 | "This is a test highlight.",
139 | ]
140 | expected = ("Voltaire (François-Marie Arouet)", "The Age of Louis XIV")
141 |
142 | # When
143 | actual = _parse_author_and_title(raw_clipping_list)
144 |
145 | # Then
146 | assert expected == actual
147 |
148 |
149 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_there_are_parentheses_in_the_title():
150 | # Given
151 | raw_clipping_list = [
152 | "The Mysterious Disappearance of Leon (I Mean Noel) (Ellen Raskin)",
153 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
154 | "",
155 | "This is a test highlight.",
156 | ]
157 | expected = ("Ellen Raskin", "The Mysterious Disappearance of Leon (I Mean Noel)")
158 |
159 | # When
160 | actual = _parse_author_and_title(raw_clipping_list)
161 |
162 | # Then
163 | assert expected == actual
164 |
165 |
166 | def test_parse_page_location_date_and_note_should_parse_the_page_location_and_date_when_there_are_all_three():
167 | # Given
168 | raw_clipping_list = [
169 | "Relativity (Albert Einstein)",
170 | "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
171 | "",
172 | "This is a test highlight.",
173 | ]
174 | expected = ("3", "184-185", "Friday, 30 April 2021 12:31:29 AM", False)
175 |
176 | # When
177 | actual = _parse_page_location_date_and_note(raw_clipping_list)
178 |
179 | # Then
180 | assert expected == actual
181 |
182 |
183 | def test_parse_page_location_date_and_note_should_parse_the_page_and_location_when_there_is_no_date():
184 | # Given
185 | raw_clipping_list = [
186 | "Relativity (Albert Einstein)",
187 | "- Your Highlight on page 3 | Location 184-185",
188 | "",
189 | "This is a test highlight.",
190 | ]
191 | expected = ("3", "184-185", "", False)
192 |
193 | # When
194 | actual = _parse_page_location_date_and_note(raw_clipping_list)
195 |
196 | # Then
197 | assert expected == actual
198 |
199 |
200 | def test_parse_page_location_date_and_note_should_parse_the_location_and_date_when_there_is_no_page():
201 | # Given
202 | raw_clipping_list = [
203 | "Relativity (Albert Einstein)",
204 | "Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
205 | "",
206 | "This is a test highlight.",
207 | ]
208 | expected = ("", "184-185", "Friday, 30 April 2021 12:31:29 AM", False)
209 |
210 | # When
211 | actual = _parse_page_location_date_and_note(raw_clipping_list)
212 |
213 | # Then
214 | assert expected == actual
215 |
216 |
217 | def test_parse_page_location_date_and_note_should_parse_the_page_and_date_when_there_is_no_location():
218 | # Given
219 | raw_clipping_list = [
220 | "Relativity (Albert Einstein)",
221 | "- Your Highlight on page 3 | Added on Friday, April 30, 2021 12:31:29 AM",
222 | "",
223 | "This is a test highlight.",
224 | ]
225 | expected = ("3", "", "Friday, 30 April 2021 12:31:29 AM", False)
226 |
227 | # When
228 | actual = _parse_page_location_date_and_note(raw_clipping_list)
229 |
230 | # Then
231 | assert expected == actual
232 |
233 |
234 | def test_add_parsed_items_to_books_dict_should_add_the_parsed_items_when_the_book_is_not_already_in_the_books_dict():
235 | # Given
236 | books = {}
237 | title = "Relativity"
238 | author = "Albert Einstein"
239 | highlight = "This is a first highlight."
240 | page = "1"
241 | location = "100"
242 | date = datetime(2021, 4, 30, 0, 31, 29)
243 |
244 | expected = {
245 | "Relativity": {
246 | "author": "Albert Einstein",
247 | "highlights": [
248 | (
249 | "This is a first highlight.",
250 | "1",
251 | "100",
252 | datetime(2021, 4, 30, 0, 31, 29),
253 | )
254 | ],
255 | }
256 | }
257 |
258 | # When
259 | actual = _add_parsed_items_to_books_dict(
260 | books, title, author, highlight, page, location, date
261 | )
262 |
263 | # Then
264 | assert expected == actual
265 |
266 |
267 | def test_add_parsed_items_to_books_dict_should_add_the_parsed_items_when_the_book_is_already_in_the_books_dict():
268 | # Given
269 | books = {
270 | "Relativity": {
271 | "author": "Albert Einstein",
272 | "highlights": [
273 | (
274 | "This is a first highlight.",
275 | "1",
276 | "100",
277 | datetime(2021, 4, 30, 0, 31, 29),
278 | )
279 | ],
280 | }
281 | }
282 | title = "Relativity"
283 | author = "Albert Einstein"
284 | highlight = "This is a second highlight."
285 | page = "2"
286 | location = "200"
287 | date = datetime(2021, 5, 1, 0, 31, 29)
288 |
289 | expected = {
290 | "Relativity": {
291 | "author": "Albert Einstein",
292 | "highlights": [
293 | (
294 | "This is a first highlight.",
295 | "1",
296 | "100",
297 | datetime(2021, 4, 30, 0, 31, 29),
298 | ),
299 | (
300 | "This is a second highlight.",
301 | "2",
302 | "200",
303 | datetime(2021, 5, 1, 0, 31, 29),
304 | ),
305 | ],
306 | }
307 | }
308 |
309 | # When
310 | actual = _add_parsed_items_to_books_dict(
311 | books, title, author, highlight, page, location, date
312 | )
313 |
314 | # Then
315 | assert expected == actual
316 |
--------------------------------------------------------------------------------