├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── k2n.code-workspace
├── kindle2notion
    ├── __init__.py
    ├── __main__.py
    ├── exporting.py
    ├── parsing.py
    └── reading.py
├── requirements-dev.txt
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
    ├── test_data
        └── Test Clippings.txt
    ├── test_exporting.py
    ├── test_parsing.py
    └── test_reading.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behaviour:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behaviour**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Additional context**
32 | Add any other context about the problem here.
33 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/python
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | pytestdebug.log
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | doc/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #poetry.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | # .env
113 | .env
114 | .venv/
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 | pythonenv*
121 | 
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 | 
126 | # Rope project settings
127 | .ropeproject
128 | 
129 | # mkdocs documentation
130 | /site
131 | 
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 | 
137 | # Pyre type checker
138 | .pyre/
139 | 
140 | # pytype static type analyzer
141 | .pytype/
142 | 
143 | # operating system-related files
144 | # file properties cache/storage on macOS
145 | *.DS_Store
146 | # thumbnail cache on Windows
147 | Thumbs.db
148 | 
149 | # profiling data
150 | .prof
151 | 
152 | 
153 | # End of https://www.toptal.com/developers/gitignore/api/python
154 | 
155 | 
156 | # IDE
157 | .vscode/
158 | .idea/
159 | .code-workspace
160 | # custom project files
161 | MyClippings.txt
162 | dist/
163 | images/
164 | my_kindle_clippings.json


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Jeffrey Jacob
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include requirements-dev.txt
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- PROJECT OVERVIEW -->
  3 | <p align="center">
  4 |   <img width="500" src="https://i.imgur.com/mJOjtvo.png">
  5 | </p>
  6 | <!-- <h1 align="center">Kindle2Notion</h1> -->
  7 | <p align="center">
  8 |   A program to copy all your Kindle highlights and notes to a page in Notion. 
  9 |   <br />
 10 |   <a href="https://github.com/paperboi/Kindle2Notion">Explore the docs</a>
 11 |   ·
 12 |   <a href="https://github.com/paperboi/Kindle2Notion/issues">File issues and feature requests here</a>
 13 | </p>
 14 | <p align="center">
 15 |   If you found this script helpful or appreciate my work, you can support me here:
 16 |   <br><br>
 17 |   <a href="https://www.producthunt.com/posts/kindle2notion?utm_source=badge-featured&utm_medium=badge&utm_souce=badge-kindle2notion" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=295918&theme=light" alt="Kindle2Notion - Export your Kindle clippings to a Notion database. | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
 18 |   <a href="https://www.buymeacoffee.com/jeffreyjacob" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" style="height: 54px;" height="54"></a>
 19 | </p>
 20 | 
 21 | [![Downloads][downloads-shield]][downloads-url]
 22 | [![Contributors][contributors-shield]][contributors-url]
 23 | [![Forks][forks-shield]][forks-url]
 24 | [![Stargazers][stars-shield]][stars-url]
 25 | [![Issues][issues-shield]][issues-url]
 26 | [![MIT License][license-shield]][license-url]
 27 | [![LinkedIn][linkedin-shield]][linkedin-url]
 28 | 
 29 | <!-- TABLE OF CONTENTS -->
 30 | ## Table of Contents
 31 | 
 32 | - [Table of Contents](#table-of-contents)
 33 | - [About The Project](#about-the-project)
 34 | - [Getting Started](#getting-started)
 35 |   - [Prerequisites](#prerequisites)
 36 |   - [Installation & Setup](#installation--setup)
 37 | - [Usage](#usage)
 38 | - [Roadmap](#roadmap)
 39 | - [Contributing](#contributing)
 40 | - [License](#license)
 41 | - [Contact](#contact)
 42 | 
 43 | 
 44 | 
 45 | <!-- ABOUT THE PROJECT -->
 46 | ## About The Project
 47 | 
 48 | ![Kindle2Notion Demo][product-demo]
 49 | 
 50 | A Python package to export all the clippings from your Kindle device to a page in Notion. Run this script whenever you plug in your Kindle device to your PC.
 51 | 
 52 | A key inspiration behind this project was the notes saving feature on Google Play Books, which automatically syncs all your highlights from a book hosted on the service to a Google Doc in real time. I wanted a similar feature for my Kindle and this project is one step towards a solution for this problem.
 53 | 
 54 | **Intended for**
 55 | - Avid readers who would want to browse through their prior reads and highlights anytime anywhere.
 56 | - For those who like to take notes alongside their highlights.
 57 | 
 58 | 
 59 | <!-- GETTING STARTED -->
 60 | ## Getting Started
 61 | 
 62 | 
 63 | > **NOTE**
 64 | > Need a step-by-step guide to setting this package up? Click [here](https://kindle2notion.notion.site/Kindle2Notion-8a9683c9b19546c3b1cf42a68aceebee) for the full guide. 
 65 | 
 66 | To get a local copy up and running follow these simple steps:
 67 | 
 68 | ### Prerequisites
 69 | 
 70 | * A Kindle device.
 71 | * A Notion account to store your links.
 72 | * Python 3 on your system to run the code.
 73 | 
 74 | ### Installation & Setup
 75 | 
 76 | > **NOTE** 
 77 | > As of 10-07-2022, the latest update to this package relies on the offical Notion API for sending API requests. This requires you to create an integration token from [here](https://www.notion.so/my-integrations). For old users, you'd have to switch to this method as well since `notion-py` isn't being maintained anymore.
 78 |  
 79 | 1. Install the library.
 80 |     ```sh
 81 |     pip install kindle2notion
 82 |     ```
 83 | 2. Create an integration on Notion.
 84 |       1. Duplicate this [database template](https://kindle2notion.notion.site/6d26062e3bb04dd89b988806978c1fe7?v=0d394a8162cc481280966b35a37465c2) to your the workspace you want to use for storing your Kindle clippings.
 85 |       2. Open _Settings & Members_ from the left navigation bar.
 86 |       3. Select the _Integrations_ option listed under _Workspaces_ in the settings modal.
 87 |       4. Click on _Develop your own integrations_ to redirect to the integrations page.
 88 |       5. On the integrations page, select the _New integration_ option and enter the name of the integration and the workspace you want to use it with. Hit submit and your internal integration token will be generated.
 89 | 3. Go back to your database page and click on the _Share_ button on the top right corner. Use the selector to find your integration by its name and then click _Invite_. Your integration now has the requested permissions on the new database. 
 90 | 
 91 | 
 92 | <!-- USAGE EXAMPLES -->
 93 | ## Usage
 94 | 
 95 | 1. Plug in your Kindle device to your PC.
 96 |     
 97 | 2. You need the following three arguments in hand before running the code:
 98 |    1. Take `your_notion_auth_token` from the secret key bearer token provided.
 99 |    2. Find `your_notion_database_id` from the URL of the database you have copied to your workspace. For reference,
100 |       ```
101 |       https://www.notion.so/myworkspace/a8aec43384f447ed84390e8e42c2e089?v=...
102 |                                         |--------- Database ID --------|
103 |       ```
104 |    3. `your_kindle_clippings_file` is the path to your `My Clippings File.txt` on your Kindle.
105 | 
106 | 3. Additionally, you may modify some default parameters of the command-line with the following options of the CLI:
107 |    - ```--enable_highlight_date```  Set to False if you don't want to see the "Date Added" information in Notion.
108 |    - ```--enable_book_cover```      Set to False if you don't want to store the book cover in Notion.
109 |     
110 | 4. Export your Kindle highlights and notes to Notion!
111 |    - On MacOS and UNIX,
112 |    ```sh
113 |    kindle2notion 'your_notion_auth_token' 'your_notion_table_id' 'your_kindle_clippings_file'
114 |    ```
115 |    - On Windows
116 |    ```sh
117 |    python -m kindle2notion 'your_notion_auth_token' 'your_notion_table_id' 'your_kindle_clippings_file'
118 |    ```
119 | You may also avail help with the following command:
120 |    ```sh
121 |    kindle2notion --help
122 |    python -m kindle2notion --help
123 |    ```
124 | 
125 | > **NOTE**
126 | > This code has been tested on a 4th Gen Kindle Paperwhite on both MacOS and Windows.
127 | 
128 | 
129 | <!-- ROADMAP -->
130 | ## Roadmap
131 | 
132 | See the [open issues](https://github.com/paperboi/Kindle2Notion/issues) for a list of proposed features (and known issues).
133 | 
134 | 
135 | 
136 | <!-- CONTRIBUTING -->
137 | ## Contributing
138 | 
139 | <!-- Contributions are what make the open source community such an amazing place to be learn, inspire, and create. -->
140 | Any contributions you make are **greatly appreciated**.
141 | 
142 | 1. Fork the Project
143 | 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
144 | 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
145 | 4. Push to the Branch (`git push origin feature/AmazingFeature`)
146 | 5. Open a Pull Request
147 | 
148 | 
149 | 
150 | <!-- LICENSE -->
151 | ## License
152 | 
153 | Distributed under the MIT License. See [LICENSE][license-url] for more information.
154 | 
155 | 
156 | 
157 | <!-- CONTACT -->
158 | ## Contact
159 | 
160 | Jeffrey Jacob ([Twitter](https://twitter.com/jeffreysamjacob) | [Email](mailto:jeffreysamjacob@gmail.com) | [LinkedIn](https://www.linkedin.com/in/jeffreysamjacob/))
161 | 
162 | 
163 | [downloads-shield]: https://pepy.tech/badge/kindle2notion
164 | [downloads-url]: https://pepy.tech/project/kindle2notion
165 | [contributors-shield]: https://img.shields.io/github/contributors/paperboi/Kindle2Notion.svg?style=flat-square
166 | [contributors-url]: https://github.com/paperboi/Kindle2Notion/graphs/contributors
167 | [forks-shield]: https://img.shields.io/github/forks/paperboi/Kindle2Notion.svg?style=flat-square
168 | [forks-url]: https://github.com/paperboi/Kindle2Notion/network/members
169 | [stars-shield]: https://img.shields.io/github/stars/paperboi/Kindle2Notion.svg?style=flat-square
170 | [stars-url]: https://github.com/paperboi/Kindle2Notion/stargazers
171 | [issues-shield]: https://img.shields.io/github/issues/paperboi/Kindle2Notion.svg?style=flat-square
172 | [issues-url]: https://github.com/paperboi/Kindle2Notion/issues
173 | [license-shield]: https://img.shields.io/github/license/paperboi/Kindle2Notion.svg?style=flat-square
174 | [license-url]: https://github.com/paperboi/kindle2notion/blob/master/LICENSE
175 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=flat-square&logo=linkedin&colorB=555
176 | [linkedin-url]: https://www.linkedin.com/in/jeffreysamjacob/
177 | [product-demo]: https://i.imgur.com/IlDmEOy.gif
178 | 


--------------------------------------------------------------------------------
/k2n.code-workspace:
--------------------------------------------------------------------------------
1 | {
2 |     "folders": [
3 |         {
4 |             "path": "."
5 |         }
6 |     ]
7 | }


--------------------------------------------------------------------------------
/kindle2notion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paperboi/kindle2notion/c9ca527d2e98167e327893652e8d153908e52fd4/kindle2notion/__init__.py


--------------------------------------------------------------------------------
/kindle2notion/__main__.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import click
 4 | import notional
 5 | 
 6 | from kindle2notion.exporting import export_to_notion
 7 | from kindle2notion.parsing import parse_raw_clippings_text
 8 | from kindle2notion.reading import read_raw_clippings
 9 | 
10 | 
11 | @click.command()
12 | @click.argument("notion_api_auth_token")
13 | @click.argument("notion_database_id")
14 | @click.argument("clippings_file")
15 | @click.option(
16 |     "--enable_location",
17 |     default=True,
18 |     help='Set to False if you don\'t want to see the "Location" and "Page" information in Notion.'
19 | )
20 | @click.option(
21 |     "--enable_highlight_date",
22 |     default=True,
23 |     help='Set to False if you don\'t want to see the "Date Added" information in Notion.',
24 | )
25 | @click.option(
26 |     "--enable_book_cover",
27 |     default=True,
28 |     help="Set to False if you don't want to store the book cover in Notion.",
29 | )
30 | @click.option(
31 |     "--separate_blocks",
32 |     default=False,
33 |     help='Set to True to separate each clipping into a separate quote block. Enabling this option significantly decreases upload speed.'
34 | )
35 | 
36 | def main(
37 |     notion_api_auth_token,
38 |     notion_database_id,
39 |     clippings_file,
40 |     enable_location,
41 |     enable_highlight_date,
42 |     enable_book_cover,
43 |     separate_blocks
44 | ):
45 |     notion = notional.connect(auth=notion_api_auth_token)
46 |     db = notion.databases.retrieve(notion_database_id)
47 | 
48 |     if db:
49 |         print("Notion page is found. Analyzing clippings file...")
50 | 
51 |         # Open the clippings text file and load it into all_clippings
52 |         all_clippings = read_raw_clippings(clippings_file)
53 | 
54 |         # Parse all_clippings file and format the content to be sent tp the Notion DB into all_books
55 |         all_books = parse_raw_clippings_text(all_clippings)
56 | 
57 |         # Export all the contents in all_books into the Notion DB.
58 |         export_to_notion(
59 |             all_books,
60 |             enable_location,
61 |             enable_highlight_date,
62 |             enable_book_cover,
63 |             separate_blocks,
64 |             notion_api_auth_token,
65 |             notion_database_id
66 |         )
67 | 
68 |         with open("my_kindle_clippings.json", "w") as out_file:
69 |             json.dump(all_books, out_file, indent=4)
70 | 
71 |         print("Transfer complete... Exiting script...")
72 |     else:
73 |         print(
74 |             "Notion page not found! Please check whether the Notion database ID is assigned properly."
75 |         )
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     main()
80 | 


--------------------------------------------------------------------------------
/kindle2notion/exporting.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from typing import Dict, List, Tuple
  3 | 
  4 | import notional
  5 | from notional.blocks import Paragraph, TextObject, Quote
  6 | from notional.query import TextCondition
  7 | from notional.types import Date, ExternalFile, Number, RichText, Title
  8 | from requests import get
  9 | 
 10 | # from notional.text import Annotations
 11 | 
 12 | # from more_itertools import grouper
 13 | 
 14 | 
 15 | NO_COVER_IMG = "https://via.placeholder.com/150x200?text=No%20Cover"
 16 | 
 17 | 
 18 | def export_to_notion(
 19 |     all_books: Dict,
 20 |     enable_location: bool,
 21 |     enable_highlight_date: bool,
 22 |     enable_book_cover: bool,
 23 |     separate_blocks: bool,
 24 |     notion_api_auth_token: str,
 25 |     notion_database_id: str,
 26 | ) -> None:
 27 |     print("Initiating transfer...\n")
 28 | 
 29 |     for title in all_books:
 30 |         each_book = all_books[title]
 31 |         author = each_book["author"]
 32 |         clippings = each_book["highlights"]
 33 |         clippings_count = len(clippings)
 34 |         (
 35 |             formatted_clippings,
 36 |             last_date,
 37 |         ) = _prepare_aggregated_text_for_one_book(clippings, enable_location, enable_highlight_date)
 38 |         message = _add_book_to_notion(
 39 |             title,
 40 |             author,
 41 |             clippings_count,
 42 |             formatted_clippings,
 43 |             last_date,
 44 |             notion_api_auth_token,
 45 |             notion_database_id,
 46 |             enable_book_cover,
 47 |             separate_blocks,
 48 |         )
 49 |         if message != "None to add":
 50 |             print("✓", message)
 51 | 
 52 | 
 53 | def _prepare_aggregated_text_for_one_book(
 54 |         clippings: List, enable_location: bool, enable_highlight_date: bool
 55 | ) -> Tuple[str, str]:
 56 |     # TODO: Special case for books with len(clippings) >= 100 characters. Character limit in a Paragraph block in Notion is 100
 57 |     formatted_clippings = []
 58 |     for each_clipping in clippings:
 59 |         aggregated_text = ""
 60 |         text = each_clipping[0]
 61 |         page = each_clipping[1]
 62 |         location = each_clipping[2]
 63 |         date = each_clipping[3]
 64 |         is_note = each_clipping[4]
 65 |         if is_note == True:
 66 |             aggregated_text += "> " + "NOTE: \n"
 67 | 
 68 |         aggregated_text += text + "\n"
 69 |         if enable_location:
 70 |             if page != "":
 71 |                 aggregated_text += "Page: " + page + ", "
 72 |             if location != "":
 73 |                 aggregated_text += "Location: " + location
 74 |         if enable_highlight_date and (date != ""):
 75 |             aggregated_text += ", Date Added: " + date
 76 | 
 77 |         aggregated_text = aggregated_text.strip() + "\n\n"
 78 |         formatted_clippings.append(aggregated_text)
 79 |         last_date = date
 80 |     return formatted_clippings, last_date
 81 | 
 82 | 
 83 | def _add_book_to_notion(
 84 |     title: str,
 85 |     author: str,
 86 |     clippings_count: int,
 87 |     formatted_clippings: list,
 88 |     last_date: str,
 89 |     notion_api_auth_token: str,
 90 |     notion_database_id: str,
 91 |     enable_book_cover: bool,
 92 |     separate_blocks: bool,
 93 | ):
 94 |     notion = notional.connect(auth=notion_api_auth_token)
 95 |     last_date = datetime.strptime(last_date, "%A, %d %B %Y %I:%M:%S %p")
 96 | 
 97 |     # Condition variables
 98 |     title_exists = False
 99 |     current_clippings_count = 0
100 | 
101 |     query = (
102 |         notion.databases.query(notion_database_id)
103 |         .filter(property="Title", rich_text=TextCondition(equals=title))
104 |         .limit(1)
105 |     )
106 |     data = query.first()
107 | 
108 |     if data:
109 |         title_exists = True
110 |         block_id = data.id
111 |         block = notion.pages.retrieve(block_id)
112 |         if block["Highlights"] == None:
113 |             block["Highlights"] = Number[0]
114 |         elif block["Highlights"] == clippings_count:  # if no change in clippings
115 |             title_and_author = str(block["Title"]) + " (" + str(block["Author"]) + ")"
116 |             print(title_and_author)
117 |             print("-" * len(title_and_author))
118 |             return "None to add.\n"
119 | 
120 |     title_and_author = title + " (" + str(author) + ")"
121 |     print(title_and_author)
122 |     print("-" * len(title_and_author))
123 | 
124 |     # Add a new book to the database
125 |     if not title_exists:
126 |         new_page = notion.pages.create(
127 |             parent=notion.databases.retrieve(notion_database_id),
128 |             properties={
129 |                 "Title": Title[title],
130 |                 "Author": RichText[author],
131 |                 "Highlights": Number[clippings_count],
132 |                 "Last Highlighted": Date[last_date.isoformat()],
133 |                 "Last Synced": Date[datetime.now().isoformat()],
134 |             },
135 |             children=[],
136 |         )
137 |         # page_content = _update_book_with_clippings(formatted_clippings)
138 | 
139 | 
140 |         if separate_blocks:
141 |             for formatted_clipping in formatted_clippings:
142 |                 page_content = Quote[formatted_clipping.strip()]
143 |                 notion.blocks.children.append(new_page, page_content)
144 |         else:
145 |             page_content = Paragraph["".join(formatted_clippings)]
146 |             notion.blocks.children.append(new_page, page_content)
147 | 
148 |         block_id = new_page.id
149 |         if enable_book_cover:
150 |             # Fetch a book cover from Google Books if the cover for the page is not set
151 |             if new_page.cover is None:
152 |                 result = _get_book_cover_uri(title, author)
153 | 
154 |             if result is None:
155 |                 # Set the page cover to a placeholder image
156 |                 cover = ExternalFile[NO_COVER_IMG]
157 |                 print(
158 |                     "× Book cover couldn't be found. "
159 |                     "Please replace the placeholder image with the original book cover manually."
160 |                 )
161 |             else:
162 |                 # Set the page cover to that of the book
163 |                 cover = ExternalFile[result]
164 |                 print("✓ Added book cover.")
165 | 
166 |             notion.pages.set(new_page, cover=cover)
167 |     else:
168 |         # update a book that already exists in the database
169 |         page = notion.pages.retrieve(block_id)
170 |         # page_content = _update_book_with_clippings(formatted_clippings)
171 |         page_content = Paragraph["".join(formatted_clippings)]
172 |         notion.blocks.children.append(page, page_content)
173 |         # TODO: Delete existing page children (or figure out how to find changes to be made by comparing it with local json file.)
174 |         current_clippings_count = int(float(str(page["Highlights"])))
175 |         page["Highlights"] = Number[clippings_count]
176 |         page["Last Highlighted"] = Date[last_date.isoformat()]
177 |         page["Last Synced"] = Date[datetime.now().isoformat()]
178 | 
179 |     # Logging the changes made
180 |     diff_count = (
181 |         clippings_count - current_clippings_count
182 |         if clippings_count > current_clippings_count
183 |         else clippings_count
184 |     )
185 |     message = str(diff_count) + " notes/highlights added successfully.\n"
186 | 
187 |     return message
188 | 
189 | 
190 | # def _create_rich_text_object(text):
191 | #     if "Note: " in text:
192 | #         # Bold text
193 | #         nested = TextObject._NestedData(content=text)
194 | #         rich = TextObject(text=nested, plain_text=text, annotations=Annotations(bold=True))
195 | #     elif any(item in text for item in ["Page: ", "Location: ", "Date Added: "]):
196 | #         # Italic text
197 | #         nested = TextObject._NestedData(content=text)
198 | #         rich = TextObject(text=nested, plain_text=text, annotations=Annotations(italic=True))
199 | #     else:
200 | #         # Plain text
201 | #         nested = TextObject._NestedData(content=text)
202 | #         rich = TextObject(text=nested, plain_text=text)
203 | #     return rich
204 | 
205 | 
206 | # def _update_book_with_clippings(formatted_clippings):
207 | #     rtf = []
208 | #     for each_clipping in formatted_clippings:
209 | #         each_clipping_list = each_clipping.split("*")
210 | #         each_clipping_list = list(filter(None, each_clipping_list))
211 | #         for each_line in each_clipping_list:
212 | #             rtf.append(_create_rich_text_object(each_line))
213 | #     print(len(rtf))
214 | #     content = Paragraph._NestedData(rich_text=rtf)
215 | #     para = Paragraph(paragraph=content)
216 | #     return para
217 | 
218 | 
219 | def _get_book_cover_uri(title: str, author: str):
220 |     req_uri = "https://www.googleapis.com/books/v1/volumes?q="
221 | 
222 |     if title is None:
223 |         return
224 |     req_uri += "intitle:" + title
225 | 
226 |     if author is not None:
227 |         req_uri += "+inauthor:" + author
228 | 
229 |     response = get(req_uri).json().get("items", [])
230 |     if len(response) > 0:
231 |         for x in response:
232 |             if x.get("volumeInfo", {}).get("imageLinks", {}).get("thumbnail"):
233 |                 return (
234 |                     x.get("volumeInfo", {})
235 |                     .get("imageLinks", {})
236 |                     .get("thumbnail")
237 |                     .replace("http://", "https://")
238 |                 )
239 |     return
240 | 


--------------------------------------------------------------------------------
/kindle2notion/parsing.py:
--------------------------------------------------------------------------------
  1 | from re import findall
  2 | from typing import Dict, List, Tuple
  3 | 
  4 | from dateparser import parse
  5 | 
  6 | BOOKS_WO_AUTHORS = []
  7 | 
  8 | ACADEMIC_TITLES = [
  9 |     "A.A.",
 10 |     "A.S.",
 11 |     "A.A.A.",
 12 |     "A.A.S.",
 13 |     "A.B.",
 14 |     "A.D.N.",
 15 |     "A.M.",
 16 |     "A.M.T.",
 17 |     "C.E.",
 18 |     "Ch.E.",
 19 |     "D.A.",
 20 |     "D.A.S.",
 21 |     "D.B.A.",
 22 |     "D.C.",
 23 |     "D.D.",
 24 |     "D.Ed.",
 25 |     "D.L.S.",
 26 |     "D.M.D.",
 27 |     "D.M.S.",
 28 |     "D.P.A.",
 29 |     "D.P.H.",
 30 |     "D.R.E.",
 31 |     "D.S.W.",
 32 |     "D.Sc.",
 33 |     "D.V.M.",
 34 |     "Ed.D.",
 35 |     "Ed.S.",
 36 |     "E.E.",
 37 |     "E.M.",
 38 |     "E.Met.",
 39 |     "I.E.",
 40 |     "J.D.",
 41 |     "J.S.D.",
 42 |     "L.H.D.",
 43 |     "Litt.B.",
 44 |     "Litt.M.",
 45 |     "LL.B.",
 46 |     "LL.D.",
 47 |     "LL.M.",
 48 |     "M.A.",
 49 |     "M.Aero.E.",
 50 |     "M.B.A.",
 51 |     "M.C.S.",
 52 |     "M.D.",
 53 |     "M.Div.",
 54 |     "M.E.",
 55 |     "M.Ed.",
 56 |     "M.Eng.",
 57 |     "M.F.A.",
 58 |     "M.H.A.",
 59 |     "M.L.S.",
 60 |     "M.Mus.",
 61 |     "M.N.",
 62 |     "M.P.A.",
 63 |     "M.S.",
 64 |     "M.S.Ed.",
 65 |     "M.S.W.",
 66 |     "M.Th.",
 67 |     "Nuc.E.",
 68 |     "O.D.",
 69 |     "Pharm.D.",
 70 |     "Ph.B.",
 71 |     "Ph.D.",
 72 |     "S.B.",
 73 |     "Sc.D.",
 74 |     "S.J.D.",
 75 |     "S.Sc.D.",
 76 |     "Th.B.",
 77 |     "Th.D.",
 78 |     "Th.M.",
 79 | ]
 80 | 
 81 | DELIMITERS = ["; ", " & ", " and "]
 82 | 
 83 | 
 84 | def parse_raw_clippings_text(raw_clippings_text: str) -> Dict:
 85 |     raw_clippings_list = raw_clippings_text.split("==========")
 86 |     print(f"Found {len(raw_clippings_list)} notes and highlights.\n")
 87 | 
 88 |     all_books = {}
 89 |     passed_clippings_count = 0
 90 | 
 91 |     for each_raw_clipping in raw_clippings_list:
 92 |         raw_clipping_list = each_raw_clipping.strip().split("\n")
 93 | 
 94 |         if _is_valid_clipping(raw_clipping_list):
 95 |             author, title = _parse_author_and_title(raw_clipping_list)
 96 |             page, location, date, is_note = _parse_page_location_date_and_note(
 97 |                 raw_clipping_list
 98 |             )
 99 |             highlight = raw_clipping_list[3]
100 | 
101 |             all_books = _add_parsed_items_to_all_books_dict(
102 |                 all_books, title, author, highlight, page, location, date, is_note
103 |             )
104 |         else:
105 |             passed_clippings_count += 1
106 | 
107 |     print(f"× Passed {passed_clippings_count} bookmarks or unsupported clippings.\n")
108 |     return all_books
109 | 
110 | 
111 | def _is_valid_clipping(raw_clipping_list: List) -> bool:
112 |     return len(raw_clipping_list) >= 3
113 | 
114 | 
115 | def _parse_author_and_title(raw_clipping_list: List) -> Tuple[str, str]:
116 |     author, title = _parse_raw_author_and_title(raw_clipping_list)
117 |     author, title = _deal_with_exceptions_in_author_name(author, title)
118 |     title = _deal_with_exceptions_in_title(title)
119 |     return author, title
120 | 
121 | 
122 | def _parse_page_location_date_and_note(
123 |     raw_clipping_list: List,
124 | ) -> Tuple[str, str, str, bool]:
125 |     second_line = raw_clipping_list[1]
126 |     second_line_as_list = second_line.strip().split(" | ")
127 |     page = location = date = ""
128 |     is_note = False
129 | 
130 |     for element in second_line_as_list:
131 |         element = element.lower()
132 |         if "note" in element:
133 |             is_note = True
134 |         if "page" in element:
135 |             page = element[element.find("page") :].replace("page", "").strip()
136 |         if "location" in element:
137 |             location = (
138 |                 element[element.find("location") :].replace("location", "").strip()
139 |             )
140 |         if "added on" in element:
141 |             date = parse(
142 |                 element[element.find("added on") :].replace("added on", "").strip()
143 |             )
144 |             date = date.strftime("%A, %d %B %Y %I:%M:%S %p")
145 | 
146 |     return page, location, date, is_note
147 | 
148 | 
149 | def _add_parsed_items_to_all_books_dict(
150 |     all_books: Dict,
151 |     title: str,
152 |     author: str,
153 |     highlight: str,
154 |     page: str,
155 |     location: str,
156 |     date: str,
157 |     is_note: bool,
158 | ) -> Dict:
159 |     if title not in all_books:
160 |         all_books[title] = {"author": author, "highlights": []}
161 |     all_books[title]["highlights"].append((highlight, page, location, date, is_note))
162 |     return all_books
163 | 
164 | 
165 | def _parse_raw_author_and_title(raw_clipping_list: List) -> Tuple[str, str]:
166 |     author = ""
167 |     title = raw_clipping_list[0]
168 | 
169 |     if findall(r"\(.*?\)", raw_clipping_list[0]):
170 |         author = (findall(r"\(.*?\)", raw_clipping_list[0]))[-1]
171 |         author = author.removeprefix("(").removesuffix(")")
172 |     else:
173 |         if title not in BOOKS_WO_AUTHORS:
174 |             BOOKS_WO_AUTHORS.append(title)
175 |             print(
176 |                 f"{title} - No author found. You can manually add the author in the Notion database."
177 |             )
178 | 
179 |     title = raw_clipping_list[0].replace(author, "").strip().replace(" ()", "")
180 | 
181 |     return author, title
182 | 
183 | 
184 | def _deal_with_exceptions_in_author_name(author: str, title: str) -> Tuple[str, str]:
185 |     if "(" in author:
186 |         author = author + ")"
187 |         title = title.removesuffix(")")
188 | 
189 |     if ", " in author and all(x not in author for x in DELIMITERS):
190 |         if (author.split(", "))[1] not in ACADEMIC_TITLES:
191 |             author = " ".join(reversed(author.split(", ")))
192 | 
193 |     if "; " in author:
194 |         authorList = author.split("; ")
195 |         author = ""
196 |         for ele in authorList:
197 |             author += " ".join(reversed(ele.split(", "))) + ", "
198 |         author = author.removesuffix(", ")
199 |     return author, title
200 | 
201 | 
202 | def _deal_with_exceptions_in_title(title: str) -> str:
203 |     if ", The" in title:
204 |         title = "The " + title.replace(", The", "")
205 |     return title
206 | 


--------------------------------------------------------------------------------
/kindle2notion/reading.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | 
 4 | def read_raw_clippings(clippings_file_path: Path) -> str:
 5 |     try:
 6 |         with open(clippings_file_path, "r", encoding="utf-8-sig") as raw_clippings_file:
 7 |             raw_clippings_text = raw_clippings_file.read()
 8 |         raw_clippings_text = raw_clippings_text.replace(u"\ufeff", "")
 9 |         raw_clippings_text_decoded = raw_clippings_text.encode(
10 |             "ascii", errors="ignore"
11 |         ).decode()
12 |     except UnicodeEncodeError as e:
13 |         print(e)
14 | 
15 |     return raw_clippings_text_decoded
16 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | flake8>=3.9.2
2 | pytest>=6.2.4
3 | pytest-cov>=2.12.0
4 | black>=21.5b2
5 | isort>=5.10.1
6 | interrogate>=1.5.0


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | click>=8.0.0
2 | notional>=0.4.1
3 | pathlib
4 | dateparser>=1.0.0
5 | requests>=2.25.0


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | exclude =
 3 |     .git,
 4 |     __pycache__,
 5 |     venv,
 6 |     idea,
 7 |     .venv
 8 | max-line-length = 120
 9 | inline-quotes = single
10 | multiline-quotes = '''
11 | avoid-escape = True
12 | ignore=E203,E225,W293,W503
13 | 
14 | [tool:pytest]
15 | testpaths = tests/
16 | norecursedirs = .git venv/ .pytest_cache/ main/


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as f:
 4 |     long_description = f.read()
 5 | 
 6 | with open("requirements.txt", "r", encoding="utf-8") as f:
 7 |     requirements = f.read()
 8 | 
 9 | with open("requirements-dev.txt", "r", encoding="utf-8") as f:
10 |     requirements_dev = f.read()
11 | 
12 | setup(
13 |     name="kindle2notion",
14 |     version="1.0.1",
15 |     author="Jeffrey Jacob",
16 |     author_email="jeffreysamjacob@gmail.com",
17 |     description="Export all the clippings from your Kindle device to a database in Notion.",
18 |     long_description=long_description,
19 |     long_description_content_type="text/markdown",
20 |     url="https://github.com/paperboi/kindle2notion",
21 |     classifiers=[
22 |         "Programming Language :: Python :: 3",
23 |         "License :: OSI Approved :: MIT License",
24 |         "Operating System :: OS Independent",
25 |     ],
26 |     packages=find_packages(),
27 |     install_requires=requirements,
28 |     extras_require={"dev": requirements_dev},
29 |     python_requires=">=3.9",
30 |     entry_points={
31 |         "console_scripts": [
32 |             "kindle2notion = kindle2notion.__main__:main",
33 |         ],
34 |     },
35 | )
36 | 


--------------------------------------------------------------------------------
/tests/test_data/Test Clippings.txt:
--------------------------------------------------------------------------------
 1 | ﻿Title 1: A Great Book (Horowitz, Ben)
 2 | - Your Highlight on page 11 | Location 111-114 | Added on Tuesday, September 22, 2020 9:23:48 AM
 3 | 
 4 | This is test highlight 1.
 5 | ==========
 6 | Title 1: A Great Book (Horowitz, Ben)
 7 | - Your Highlight on page 11 | Location 111-114 | Added on Tuesday, September 22, 2020 9:24:04 AM
 8 | 
 9 | This is test highlight 2.
10 | ==========
11 | Title 2 Is Good Too (Bryar, Colin)
12 | - Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM
13 | 
14 | This is test highlight 3.
15 | ==========
16 | Title 2 Is Good Too (Bryar, Colin)
17 | - Your Highlight on page 34 | Location 682-684 | Added on Friday, April 30, 2021 3:14:33 PM
18 | 
19 | This is test highlight 4.
20 | ==========
21 | Title 3 Is Clean (Robert C. Martin Series) (C., Martin Robert)
22 | - Your Highlight on page 22 | Location 559-560 | Added on Saturday, May 15, 2021 10:25:42 PM
23 | 
24 | This is test highlight 5.
25 | ==========
26 | Title 3 Is Clean (Robert C. Martin Series) (C., Martin Robert)
27 | - Your Highlight on page 22 | Location 564-565 | Added on Saturday, May 15, 2021 10:26:26 PM
28 | 
29 | This is test highlight 6.
30 | ==========


--------------------------------------------------------------------------------
/tests/test_exporting.py:
--------------------------------------------------------------------------------
 1 | from kindle2notion.exporting import _prepare_aggregated_text_for_one_book
 2 | 
 3 | 
 4 | def test_prepare_aggregated_text_for_one_book_should_return_the_aggregated_text_when_highlight_date_is_disabled():
 5 |     # Given
 6 |     highlights = [
 7 |         (
 8 |             "This is an example highlight.",
 9 |             "1",
10 |             "100",
11 |             "Thursday, 29 April 2021 12:31:29 AM",
12 |             False,
13 |         ),
14 |         (
15 |             "This is a second example highlight.",
16 |             "2",
17 |             "200",
18 |             "Friday, 30 April 2021 12:31:29 AM",
19 |             True,
20 |         ),
21 |     ]
22 | 
23 |     expected = (
24 |         [
25 |             "This is an example highlight.\n* Page: 1, Location: 100\n\n",
26 |             "> NOTE: \nThis is a second example highlight.\n* Page: 2, Location: 200\n\n",
27 |         ],
28 |         "Friday, 30 April 2021 12:31:29 AM",
29 |     )
30 | 
31 |     # When
32 |     actual = _prepare_aggregated_text_for_one_book(
33 |         highlights, enable_highlight_date=False
34 |     )
35 |     print(actual)
36 |     # Then
37 |     assert expected == actual
38 | 
39 | 
40 | def test_prepare_aggregated_text_for_one_book_should_return_the_aggregated_text_when_highlight_date_is_enabled():
41 |     # Given
42 |     highlights = [
43 |         (
44 |             "This is an example highlight.",
45 |             "1",
46 |             "100",
47 |             "Thursday, 29 April 2021 12:31:29 AM",
48 |             False,
49 |         ),
50 |         (
51 |             "This is a second example highlight.",
52 |             "2",
53 |             "200",
54 |             "Friday, 30 April 2021 12:31:29 AM",
55 |             True,
56 |         ),
57 |     ]
58 | 
59 |     expected = (
60 |         [
61 |             "This is an example highlight.\n* Page: 1, Location: 100, Date Added: Thursday, 29 April 2021 12:31:29 AM\n\n",
62 |             "> NOTE: \nThis is a second example highlight.\n* Page: 2, Location: 200, Date Added: Friday, 30 April 2021 12:31:29 AM\n\n",
63 |         ],
64 |         "Friday, 30 April 2021 12:31:29 AM",
65 |     )
66 | 
67 |     # When
68 |     actual = _prepare_aggregated_text_for_one_book(
69 |         highlights, enable_highlight_date=True
70 |     )
71 |     print(actual)
72 |     # Then
73 |     assert expected == actual
74 | 


--------------------------------------------------------------------------------
/tests/test_parsing.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from pathlib import Path
  3 | 
  4 | from kindle2notion.parsing import (
  5 |     parse_raw_clippings_text,
  6 |     _parse_author_and_title,
  7 |     _parse_page_location_date_and_note,
  8 |     _add_parsed_items_to_all_books_dict,
  9 | )
 10 | from kindle2notion.reading import read_raw_clippings
 11 | 
 12 | 
 13 | def test_parse_raw_clippings_text_should_return_a_dict_with_all_the_parsed_information():
 14 |     # Given
 15 |     test_clippings_file_path = (
 16 |         Path(__file__).parent.absolute() / "test_data/Test Clippings.txt"
 17 |     )
 18 |     raw_clippings_text = read_raw_clippings(test_clippings_file_path)
 19 | 
 20 |     expected = {
 21 |         "Title 1: A Great Book": {
 22 |             "author": "Ben Horowitz",
 23 |             "highlights": [
 24 |                 (
 25 |                     "This is test highlight 1.",
 26 |                     "11",
 27 |                     "111-114",
 28 |                     "Tuesday, 22 September 2020 09:23:48 AM",
 29 |                     False,
 30 |                 ),
 31 |                 (
 32 |                     "This is test highlight 2.",
 33 |                     "11",
 34 |                     "111-114",
 35 |                     "Tuesday, 22 September 2020 09:24:04 AM",
 36 |                     False,
 37 |                 ),
 38 |             ],
 39 |         },
 40 |         "Title 2 Is Good Too": {
 41 |             "author": "Colin Bryar",
 42 |             "highlights": [
 43 |                 (
 44 |                     "This is test highlight 3.",
 45 |                     "3",
 46 |                     "184-185",
 47 |                     "Friday, 30 April 2021 12:31:29 AM",
 48 |                     False,
 49 |                 ),
 50 |                 (
 51 |                     "This is test highlight 4.",
 52 |                     "34",
 53 |                     "682-684",
 54 |                     "Friday, 30 April 2021 03:14:33 PM",
 55 |                     False,
 56 |                 ),
 57 |             ],
 58 |         },
 59 |         "Title 3 Is Clean (Robert C. Martin Series)": {
 60 |             "author": "Martin Robert C.",
 61 |             "highlights": [
 62 |                 (
 63 |                     "This is test highlight 5.",
 64 |                     "22",
 65 |                     "559-560",
 66 |                     "Saturday, 15 May 2021 10:25:42 PM",
 67 |                     False,
 68 |                 ),
 69 |                 (
 70 |                     "This is test highlight 6.",
 71 |                     "22",
 72 |                     "564-565",
 73 |                     "Saturday, 15 May 2021 10:26:26 PM",
 74 |                     False,
 75 |                 ),
 76 |             ],
 77 |         },
 78 |     }
 79 | 
 80 |     # When
 81 |     actual = parse_raw_clippings_text(raw_clippings_text)
 82 | 
 83 |     # Then
 84 |     assert expected == actual
 85 | 
 86 | 
 87 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_the_author_name_is_formatted_with_a_comma():
 88 |     # Given
 89 |     raw_clipping_list = [
 90 |         "Relativity (Einstein, Albert)",
 91 |         "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
 92 |         "",
 93 |         "This is a test highlight.",
 94 |         False,
 95 |     ]
 96 |     expected = ("Albert Einstein", "Relativity")
 97 | 
 98 |     # When
 99 |     actual = _parse_author_and_title(raw_clipping_list)
100 | 
101 |     # Then
102 |     assert expected == actual
103 | 
104 | 
105 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_the_author_name_is_first_name_last_name():
106 |     # Given
107 |     raw_clipping_list = [
108 |         "Relativity (Albert Einstein)",
109 |         "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
110 |         "",
111 |         "This is a test highlight.",
112 |         False,
113 |     ]
114 |     expected = ("Albert Einstein", "Relativity")
115 | 
116 |     # When
117 |     actual = _parse_author_and_title(raw_clipping_list)
118 | 
119 |     # Then
120 |     assert expected == actual
121 | 
122 | 
123 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_there_are_parentheses_in_the_author_name():
124 |     # Given
125 |     raw_clipping_list = [
126 |         "Candide (Voltaire (François-Marie Arouet))",
127 |         "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
128 |         "",
129 |         "This is a test highlight.",
130 |         False,
131 |     ]
132 |     expected = ("Voltaire (François-Marie Arouet)", "Candide")
133 | 
134 |     # When
135 |     actual = _parse_author_and_title(raw_clipping_list)
136 | 
137 |     # Then
138 |     assert expected == actual
139 | 
140 | 
141 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_there_is_a_The_at_the_end_of_the_title():
142 |     # Given
143 |     raw_clipping_list = [
144 |         "Age of Louis XIV, The (Voltaire (François-Marie Arouet))",
145 |         "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
146 |         "",
147 |         "This is a test highlight.",
148 |         False,
149 |     ]
150 |     expected = ("Voltaire (François-Marie Arouet)", "The Age of Louis XIV")
151 | 
152 |     # When
153 |     actual = _parse_author_and_title(raw_clipping_list)
154 | 
155 |     # Then
156 |     assert expected == actual
157 | 
158 | 
159 | def test_parse_author_and_title_case_should_parse_the_author_and_title_when_there_are_parentheses_in_the_title():
160 |     # Given
161 |     raw_clipping_list = [
162 |         "The Mysterious Disappearance of Leon (I Mean Noel) (Ellen Raskin)",
163 |         "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
164 |         "",
165 |         "This is a test highlight.",
166 |         False,
167 |     ]
168 |     expected = ("Ellen Raskin", "The Mysterious Disappearance of Leon (I Mean Noel)")
169 | 
170 |     # When
171 |     actual = _parse_author_and_title(raw_clipping_list)
172 | 
173 |     # Then
174 |     assert expected == actual
175 | 
176 | 
177 | def test_parse_page_location_date_and_note_should_parse_the_page_location_and_date_when_there_are_all_three():
178 |     # Given
179 |     raw_clipping_list = [
180 |         "Relativity (Albert Einstein)",
181 |         "- Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
182 |         "",
183 |         "This is a test highlight.",
184 |         False,
185 |     ]
186 |     expected = ("3", "184-185", "Friday, 30 April 2021 12:31:29 AM", False)
187 | 
188 |     # When
189 |     actual = _parse_page_location_date_and_note(raw_clipping_list)
190 | 
191 |     # Then
192 |     assert expected == actual
193 | 
194 | 
195 | def test_parse_page_location_date_and_note_should_parse_the_page_and_location_when_there_is_no_date():
196 |     # Given
197 |     raw_clipping_list = [
198 |         "Relativity (Albert Einstein)",
199 |         "- Your Highlight on page 3 | Location 184-185",
200 |         "",
201 |         "This is a test highlight.",
202 |         False,
203 |     ]
204 |     expected = ("3", "184-185", "", False)
205 | 
206 |     # When
207 |     actual = _parse_page_location_date_and_note(raw_clipping_list)
208 | 
209 |     # Then
210 |     assert expected == actual
211 | 
212 | 
213 | def test_parse_page_location_date_and_note_should_parse_the_location_and_date_when_there_is_no_page():
214 |     # Given
215 |     raw_clipping_list = [
216 |         "Relativity (Albert Einstein)",
217 |         "Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM",
218 |         "",
219 |         "This is a test highlight.",
220 |         False,
221 |     ]
222 |     expected = ("", "184-185", "Friday, 30 April 2021 12:31:29 AM", False)
223 | 
224 |     # When
225 |     actual = _parse_page_location_date_and_note(raw_clipping_list)
226 | 
227 |     # Then
228 |     assert expected == actual
229 | 
230 | 
231 | def test_parse_page_location_date_and_note_should_parse_the_page_and_date_when_there_is_no_location():
232 |     # Given
233 |     raw_clipping_list = [
234 |         "Relativity (Albert Einstein)",
235 |         "- Your Highlight on page 3 | Added on Friday, April 30, 2021 12:31:29 AM",
236 |         "",
237 |         "This is a test highlight.",
238 |     ]
239 |     expected = ("3", "", "Friday, 30 April 2021 12:31:29 AM", False)
240 | 
241 |     # When
242 |     actual = _parse_page_location_date_and_note(raw_clipping_list)
243 | 
244 |     # Then
245 |     assert expected == actual
246 | 
247 | 
248 | def test_add_parsed_items_to_books_dict_should_add_the_parsed_items_when_the_book_is_not_already_in_the_books_dict():
249 |     # Given
250 |     books = {}
251 |     title = "Relativity"
252 |     author = "Albert Einstein"
253 |     highlight = "This is a first highlight."
254 |     page = "1"
255 |     location = "100"
256 |     date = datetime(2021, 4, 30, 0, 31, 29)
257 |     is_note = False
258 | 
259 |     expected = {
260 |         "Relativity": {
261 |             "author": "Albert Einstein",
262 |             "highlights": [
263 |                 (
264 |                     "This is a first highlight.",
265 |                     "1",
266 |                     "100",
267 |                     datetime(2021, 4, 30, 0, 31, 29),
268 |                     False,
269 |                 )
270 |             ],
271 |         }
272 |     }
273 | 
274 |     # When
275 |     actual = _add_parsed_items_to_all_books_dict(
276 |         books, title, author, highlight, page, location, date, is_note
277 |     )
278 | 
279 |     # Then
280 |     assert expected == actual
281 | 
282 | 
283 | def test_add_parsed_items_to_books_dict_should_add_the_parsed_items_when_the_book_is_already_in_the_books_dict():
284 |     # Given
285 |     books = {
286 |         "Relativity": {
287 |             "author": "Albert Einstein",
288 |             "highlights": [
289 |                 (
290 |                     "This is a first highlight.",
291 |                     "1",
292 |                     "100",
293 |                     datetime(2021, 4, 30, 0, 31, 29),
294 |                     False,
295 |                 )
296 |             ],
297 |         }
298 |     }
299 |     title = "Relativity"
300 |     author = "Albert Einstein"
301 |     highlight = "This is a second highlight."
302 |     page = "2"
303 |     location = "200"
304 |     date = datetime(2021, 5, 1, 0, 31, 29)
305 |     is_note = False
306 | 
307 |     expected = {
308 |         "Relativity": {
309 |             "author": "Albert Einstein",
310 |             "highlights": [
311 |                 (
312 |                     "This is a first highlight.",
313 |                     "1",
314 |                     "100",
315 |                     datetime(2021, 4, 30, 0, 31, 29),
316 |                     False,
317 |                 ),
318 |                 (
319 |                     "This is a second highlight.",
320 |                     "2",
321 |                     "200",
322 |                     datetime(2021, 5, 1, 0, 31, 29),
323 |                     False,
324 |                 ),
325 |             ],
326 |         }
327 |     }
328 | 
329 |     # When
330 |     actual = _add_parsed_items_to_all_books_dict(
331 |         books, title, author, highlight, page, location, date, is_note
332 |     )
333 | 
334 |     # Then
335 |     assert expected == actual
336 | 


--------------------------------------------------------------------------------
/tests/test_reading.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from kindle2notion.reading import read_raw_clippings
 4 | 
 5 | 
 6 | def test_read_raw_clippings_should_return_all_clippings_data_as_string():
 7 |     # Given
 8 |     test_clippings_file_path = (
 9 |         Path(__file__).parent.absolute() / "test_data/Test Clippings.txt"
10 |     )
11 | 
12 |     expected = """Title 1: A Great Book (Horowitz, Ben)
13 | - Your Highlight on page 11 | Location 111-114 | Added on Tuesday, September 22, 2020 9:23:48 AM
14 | 
15 | This is test highlight 1.
16 | ==========
17 | Title 1: A Great Book (Horowitz, Ben)
18 | - Your Highlight on page 11 | Location 111-114 | Added on Tuesday, September 22, 2020 9:24:04 AM
19 | 
20 | This is test highlight 2.
21 | ==========
22 | Title 2 Is Good Too (Bryar, Colin)
23 | - Your Highlight on page 3 | Location 184-185 | Added on Friday, April 30, 2021 12:31:29 AM
24 | 
25 | This is test highlight 3.
26 | ==========
27 | Title 2 Is Good Too (Bryar, Colin)
28 | - Your Highlight on page 34 | Location 682-684 | Added on Friday, April 30, 2021 3:14:33 PM
29 | 
30 | This is test highlight 4.
31 | ==========
32 | Title 3 Is Clean (Robert C. Martin Series) (C., Martin Robert)
33 | - Your Highlight on page 22 | Location 559-560 | Added on Saturday, May 15, 2021 10:25:42 PM
34 | 
35 | This is test highlight 5.
36 | ==========
37 | Title 3 Is Clean (Robert C. Martin Series) (C., Martin Robert)
38 | - Your Highlight on page 22 | Location 564-565 | Added on Saturday, May 15, 2021 10:26:26 PM
39 | 
40 | This is test highlight 6.
41 | =========="""
42 | 
43 |     # When
44 |     actual = raw_clippings_text = read_raw_clippings(test_clippings_file_path)
45 | 
46 |     # Then
47 |     assert expected == actual
48 | 


--------------------------------------------------------------------------------