├── .coveragerc
├── .github
    └── workflows
    │   └── python-package.yml
├── .gitignore
├── LICENSE
├── README.md
├── README_zh.md
├── config.json
├── demos
    ├── notion_templage.png
    ├── yinxiang_clipper.html
    ├── yinxiang_clipper.json
    ├── yinxiang_clipper.resources
    │   ├── svg_1.svg
    │   ├── svg_10.svg
    │   ├── svg_11.svg
    │   ├── svg_12.svg
    │   ├── svg_13.svg
    │   ├── svg_2.svg
    │   ├── svg_3.svg
    │   ├── svg_4.svg
    │   ├── svg_5.svg
    │   ├── svg_6.svg
    │   ├── svg_7.svg
    │   ├── svg_8.svg
    │   └── svg_9.svg
    ├── yinxiang_clipper_2.html
    ├── yinxiang_clipper_2.resources
    │   ├── svg_1.svg
    │   ├── svg_10.svg
    │   ├── svg_11.svg
    │   ├── svg_12.svg
    │   ├── svg_13.svg
    │   ├── svg_14.svg
    │   ├── svg_15.svg
    │   ├── svg_16.svg
    │   ├── svg_17.svg
    │   ├── svg_18.svg
    │   ├── svg_19.svg
    │   ├── svg_2.svg
    │   ├── svg_20.svg
    │   ├── svg_21.svg
    │   ├── svg_22.svg
    │   ├── svg_23.svg
    │   ├── svg_24.svg
    │   ├── svg_25.svg
    │   ├── svg_26.svg
    │   ├── svg_27.svg
    │   ├── svg_3.svg
    │   ├── svg_4.svg
    │   ├── svg_5.svg
    │   ├── svg_6.svg
    │   ├── svg_7.svg
    │   ├── svg_8.svg
    │   └── svg_9.svg
    ├── yinxiang_clipper_wx.html
    ├── yinxiang_clipper_wx.json
    ├── yinxiang_gbk.html
    ├── yinxiang_markdown.html
    ├── yinxiang_markdown.json
    ├── yinxiang_markdown.resources
    │   └── 5BB98FD9-8FA4-481F-AF4E-E3B1F2DD38BC.png
    ├── yinxiang_mobile.html
    ├── yinxiang_normal.html
    ├── yinxiang_normal.resources
    │   └── 7672861D-5C56-4A07-B0E6-256950F2775A.png
    ├── yinxiang_normal_format.html
    ├── yinxiang_notion.png
    ├── yinxiang_notion2.png
    └── yinxiang_supernote.html
├── examples
    ├── insert_divider.ipynb
    ├── insert_table.ipynb
    ├── insert_text.ipynb
    ├── insert_todo.ipynb
    ├── parse_code.ipynb
    ├── parse_tag.ipynb
    └── process_md.ipynb
├── html2notion
    ├── __init__.py
    ├── main.py
    ├── translate
    │   ├── __init__.py
    │   ├── batch_import.py
    │   ├── cos_uploader.py
    │   ├── html2json.py
    │   ├── html2json_base.py
    │   ├── html2json_clipper.py
    │   ├── html2json_default.py
    │   ├── html2json_markdown.py
    │   ├── html2json_yinxiang.py
    │   ├── import_stats.py
    │   ├── notion_export.py
    │   └── notion_import.py
    └── utils
    │   ├── __init__.py
    │   ├── load_config.py
    │   ├── log.py
    │   ├── timeutil.py
    │   └── url_process.py
├── pyproject.toml
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
    ├── conftest.py
    ├── test_batchimport.py
    ├── test_config.py
    ├── test_cosupload.py
    ├── test_demos.py
    ├── test_log.py
    ├── test_notionexport.py
    ├── test_reqlimit.py
    ├── test_util.py
    └── test_yinxiang.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | omit = 
 3 |     */__init__.py
 4 |     main.py
 5 | 
 6 | [report]
 7 | exclude_lines = 
 8 |     if __name__ == .__main__.:
 9 |     async def main(.*):
10 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ "master" ]
 9 |   pull_request:
10 |     branches: [ "master" ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version: ["3.8", "3.9", "3.10", "3.11"]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v3
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v3
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         python -m pip install flake8 pytest pytest-asyncio pytest-cov
31 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 |         pip install -e .
33 |     - name: Lint with flake8
34 |       run: |
35 |         # stop the build if there are Python syntax errors or undefined names
36 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
37 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
38 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
39 |     - name: Test with pytest
40 |       run: |
41 |         pytest --cov=html2notion --cov-config=.coveragerc
42 |       env:
43 |         notion_api_key: ${{ secrets.NOTION_API_KEY }}
44 |         notion_db_id_1: ${{ secrets.NOTION_DATABASE_ID_1 }}
45 |         notion_page_id_1: ${{ secrets.NOTION_PAGE_ID_1 }}
46 |         cos_secret_id: ${{ secrets.COS_SECRET_ID }}
47 |         cos_secret_key: ${{ secrets.COS_SECRET_KEY }}
48 |         cos_region: ${{ secrets.COS_REGION }}
49 |         cos_bucket: ${{ secrets.COS_BUCKET }}
50 |     - name: Upload coverage reports to Codecov
51 |       uses: codecov/codecov-action@v3
52 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | *.pyc
 3 | notion_demo/*
 4 | build/
 5 | html2notion.egg-info/
 6 | logs/*
 7 | .config.json
 8 | .DS_Store
 9 | dist/*
10 | .coverage
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) SelfBoot 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">Html2notion <a href='https://github.com/selfboot/html2notion/blob/master/README_zh.md'>简体中文</a></h1>
  2 | <p align="center">
  3 |   <a href="https://github.com/selfboot/html2notion/actions/workflows/python-package.yml">
  4 |     <img src="https://github.com/selfboot/html2notion/actions/workflows/python-package.yml/badge.svg" alt="CI Test Status">
  5 |   </a>
  6 |  <a href="https://codecov.io/gh/selfboot/html2notion" >
  7 |  <img src="https://codecov.io/gh/selfboot/html2notion/branch/master/graph/badge.svg?token=SIM6I7BZU6" alt="Test coverage"/>
  8 |  </a>
  9 | </p>
 10 | 
 11 | html2notion is an incredibly useful tool written in Python, which allows you to import content from HTML documents into Notion notes, making it more convenient for you to organize information on the Notion platform. In addition, html2notion has been specifically optimized for the content of Evernote, and you can also use it to import notes from Evernote into Notion.
 12 | 
 13 | html2notion has powerful features and supports converting various tags in HTML files into corresponding Blocks in Notion, such as rich text blocks, headings, images, code blocks, quotes, links, etc. Below are examples of converting notes from Evernote into Notion pages.
 14 | 
 15 | ![yinxiang notion(simple demos)](https://raw.githubusercontent.com/selfboot/html2notion/master/demos/yinxiang_notion.png)
 16 | 
 17 | ![yinxiang notion2(rich text)](https://raw.githubusercontent.com/selfboot/html2notion/master/demos/yinxiang_notion2.png)
 18 | 
 19 | # Prepare
 20 | 
 21 | You only need 3 steps to use htmlnotion to import HTML into Notion.
 22 | 
 23 | ## Duplicate database
 24 | 
 25 | Click the link [notion template](https://selfboot.notion.site/selfboot/130bb48c6cbd4abbbb713d4d8472481a?v=ddda20d3f46b4b44a055d06792c142f0). As shown in the image below, use the "Duplicate" button to copy a new database to your own Notion workspace.
 26 | 
 27 | ![notion template](https://raw.githubusercontent.com/selfboot/html2notion/master/demos/notion_templage.png)
 28 | 
 29 | ## Install html2notion
 30 | 
 31 | Requires python>=3.8, install the html2notion library. You can use the pip command to install it:
 32 | 
 33 | ```
 34 | pip install html2notion
 35 | ```
 36 | 
 37 | ## Prepare Notion Configuration
 38 | 
 39 | We need to use the `Notion API key` and `Database ID` to authorize html2notion to access the Notion database. Please follow these steps:
 40 | 
 41 | 1. Create an integration;
 42 | 2. Share a database with your integration;
 43 | 3. Export the database ID;
 44 | 
 45 | When sharing the database here, you need to choose the previously duplicated database because the import operation requires some preset [properties](https://developers.notion.com/reference/property-object) information in this database.
 46 | 
 47 | For specific methods, please refer to the Notion official documentation [create an integration](https://developers.notion.com/docs/create-a-notion-integration).
 48 | 
 49 | After the setup is complete, write your API Key and database ID into a configuration file config.json.
 50 | 
 51 | ```shell
 52 | {
 53 |     "notion": {
 54 |         "database_id": "<***demo***>",
 55 |         "api_key": "<***demo***>"
 56 |     }
 57 | }
 58 | ```
 59 | 
 60 | # Usage
 61 | 
 62 | You can use `html2notion -h` to view detailed help documentation.
 63 | 
 64 | ```shell
 65 | usage: html2notion [-h] --conf CONF [--log LOG] [--batch BATCH] (--file FILE | --dir DIR)
 66 | 
 67 | Html2notion: Save HTML to your Notion notes quickly and easily, while keeping the original format as much as possible
 68 | 
 69 | options:
 70 |   -h, --help     show this help message and exit
 71 |   --conf CONF    conf file path
 72 |   --log LOG      log direct path
 73 |   --batch BATCH  batch save concurrent limit
 74 |   --file FILE    Save single html file to notion
 75 |   --dir DIR      Save all html files in the dir to notion
 76 | ```
 77 | 
 78 | For example, if you want to import all html files in the `./demos` directory into Notion, you can use the following command:
 79 | 
 80 | ```shell
 81 | html2notion --conf config.json --dir ./demos --log ~/logs --batch 10
 82 | ```
 83 | 
 84 | The above command will import all html files in the `./demos` directory into Notion, while outputting logs to the `~/logs` directory, with up to 10 concurrent tasks.
 85 | 
 86 | # More information
 87 | 
 88 | You can find more information and examples in the html2notion library's Issue: [html2notion](https://github.com/selfboot/html2notion/issues)
 89 | 
 90 | ## Contribution
 91 | 
 92 | If you find any errors or have any suggestions for improvement, please do not hesitate to submit a pull request or raise an issue, I am more than happy to accept your contributions and feedback!
 93 | 
 94 | If you encounter import failures, you can submit the html file and log file together in the issue for easier problem identification.
 95 | 
 96 | > If there are any private information in the files, please remove it first.
 97 | 
 98 | 
 99 | ## License
100 | 
101 | This project uses the MIT license. Please refer to the [LICENSE](./LICENSE) for details.
102 | 


--------------------------------------------------------------------------------
/README_zh.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">Html2notion <a href='https://github.com/selfboot/html2notion/blob/master/READMEh.md'>English</a></h1>
  2 | <p align="center">
  3 |   <a href="https://github.com/selfboot/html2notion/actions/workflows/python-package.yml">
  4 |     <img src="https://github.com/selfboot/html2notion/actions/workflows/python-package.yml/badge.svg" alt="CI Test Status">
  5 |   </a>
  6 |  <a href="https://codecov.io/gh/selfboot/html2notion" >
  7 |  <img src="https://codecov.io/gh/selfboot/html2notion/branch/master/graph/badge.svg?token=SIM6I7BZU6" alt="Test coverage"/>
  8 |  </a>
  9 | </p>
 10 | 
 11 | html2notion 是一个非常有用的 Python 写的工具，它可以将 HTML 文档中的内容导入到 Notion 笔记中，从而使您能够更方便地将信息整理到 Notion 平台上。此外，html2notion 对印象笔记的内容进行了专门优化，还可以使用它来将印象笔记中的笔记导入到 Notion 中。
 12 | 
 13 | html2notion 功能非常强大，它支持将 HTML 文件的各种标签转换为 Notion 中对应的 Block，比如富文本块、标题、图片、代码块、引用、链接等。下面是将印象笔记中的笔记转换为 notion page 中的示例。
 14 | 
 15 | ![迁移notion(保留格式)](https://raw.githubusercontent.com/selfboot/html2notion/master/demos/yinxiang_notion.png)
 16 | 
 17 | ![迁移notion2(保留格式)](https://raw.githubusercontent.com/selfboot/html2notion/master/demos/yinxiang_notion2.png)
 18 | 
 19 | # 准备工作
 20 | 
 21 | 只需要3步就可以使用 htmlnotion 来导入 html 到 notion 中。
 22 | 
 23 | ## 复制 notion 数据库
 24 | 
 25 | 点击链接 [notion template](https://selfboot.notion.site/selfboot/130bb48c6cbd4abbbb713d4d8472481a?v=ddda20d3f46b4b44a055d06792c142f0), 如下面的图所示，通过 "Duplicate" 按钮，复制一个新的数据库到自己的notion工作空间。
 26 | 
 27 | ![notion template](https://raw.githubusercontent.com/selfboot/html2notion/master/demos/notion_templage.png)
 28 | 
 29 | ## 安装 html2notion
 30 | 需要 python>=3.8, 安装 html2notion 库。您可以使用 pip 命令来安装它：
 31 | 
 32 | ```
 33 | pip install html2notion
 34 | ```
 35 | 
 36 | ## 准备 Notion 配置
 37 | 
 38 | 我们需要使用 Notion API 密钥和数据库 ID 来授权 html2notion 访问 Notion 数据库，请按照以下步骤操作：
 39 | 
 40 | 1. 创建 Integration
 41 | 2. 与 Integration 共享数据库
 42 | 3. 获取数据库 ID 和 API Key
 43 | 
 44 | 这里共享数据库的时候，要选择前面 Duplicate 的数据库，因为导入操作需要用到这个 database 里面的一些预设 [Properties](https://developers.notion.com/reference/property-object) 信息。
 45 | 
 46 | 具体方法请参考 notion 官方文档 [Create an integration](https://developers.notion.com/docs/create-a-notion-integration)。
 47 | 
 48 | 设置完成后，将自己的 API Key 和数据库 ID 写入到一个配置文件 `config.json`。
 49 | 
 50 | ```shell
 51 | {
 52 |     "notion": {
 53 |         "database_id": "<***demo***>",
 54 |         "api_key": "<***demo***>"
 55 |     }
 56 | }
 57 | ```
 58 | 
 59 | # 使用
 60 | 
 61 | 可以使用 `html2notion -h` 查看详细的帮助文档;
 62 | 
 63 | ```
 64 | usage: html2notion [-h] --conf CONF [--log LOG] [--batch BATCH] (--file FILE | --dir DIR)
 65 | 
 66 | Html2notion: Save HTML to your Notion notes quickly and easily, while keeping the original format as much as possible
 67 | 
 68 | options:
 69 |   -h, --help     show this help message and exit
 70 |   --conf CONF    conf file path
 71 |   --log LOG      log direct path
 72 |   --batch BATCH  batch save concurrent limit
 73 |   --file FILE    Save single html file to notion
 74 |   --dir DIR      Save all html files in the dir to notion
 75 | ```
 76 | 
 77 | 比如要将路径 `./demos` 下的所有 html 文件导入到 notion 中，可以使用如下命令：
 78 | 
 79 | ```shell
 80 | html2notion --conf config.json --dir ./demos --log ~/logs --batch 10
 81 | ```
 82 | 
 83 | 上面命令会将 `./demos` 目录下的所有 html 文件导入到 notion 中，同时会将日志输出到 `~/logs` 目录下，最多有 10 个并发任务。
 84 | 
 85 | # 更多信息
 86 | 
 87 | 您可以在 html2notion 库的 GitHub 存储库中找到更多的信息和示例：[html2notion](https://github.com/kevinzg/html2notion)
 88 | 
 89 | ## 贡献
 90 | 
 91 | 如果您发现了任何错误或有任何改进意见，请不要犹豫，提交一个 pull request 或提出一个 issue,我很乐意接受您的贡献和反馈！
 92 | 
 93 | 如果遇到导入失败，可以将 html 文件和日志文件一起提交到 issue 中，方便定位问题。
 94 | 
 95 | > 如果 html 文件中有隐私信息，请先删除。
 96 | 
 97 | ## 许可证
 98 | 
 99 | 此项目使用 MIT 许可证。详情请参阅 [LICENSE](./LICENSE) 文件。
100 | 


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "notion": {
 3 |         "database_id": "********",
 4 |         "api_key": "********"
 5 |     },
 6 | 
 7 |     "cos": {
 8 |         "secret_id": "********",
 9 |         "secret_key": "********",
10 |         "region": "********",
11 |         "bucket": "********"
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/demos/notion_templage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selfboot/html2notion/2f02e7a465fcdb5e3a80631f7d29fcaefb195339/demos/notion_templage.png


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_1.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="129"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="258" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_10.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="135"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="414" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_11.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="136"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="416" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_12.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="137"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="419" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_13.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="138"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="422" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_2.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="130"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="274" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_3.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 512 512" xmlns="http://www.w3.org/2000/svg" data-evernote-id="311" class="js-evernote-checked"><path d="M256 48C141.2 48 48 141.2 48 256s93.2 208 208 208 208-93.2 208-208S370.8 48 256 48zm21 312h-42V235h42v125zm0-166h-42v-42h42v42z" data-evernote-id="312" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_4.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="131"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="317" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_5.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="132"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="339" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_6.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg" data-evernote-id="363" class="js-evernote-checked"><path fill-rule="evenodd" d="M6.672 1.911a1 1 0 10-1.932.518l.259.966a1 1 0 001.932-.518l-.26-.966zM2.429 4.74a1 1 0 10-.517 1.932l.966.259a1 1 0 00.517-1.932l-.966-.26zm8.814-.569a1 1 0 00-1.415-1.414l-.707.707a1 1 0 101.415 1.415l.707-.708zm-7.071 7.072l.707-.707A1 1 0 003.465 9.12l-.708.707a1 1 0 001.415 1.415zm3.2-5.171a1 1 0 00-1.3 1.3l4 10a1 1 0 001.823.075l1.38-2.759 3.018 3.02a1 1 0 001.414-1.415l-3.019-3.02 2.76-1.379a1 1 0 00-.076-1.822l-10-4z" clip-rule="evenodd" data-evernote-id="364" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_7.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="deep-dive-expand-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="367"><polyline points="6 9 12 15 18 9" data-evernote-id="368" class="js-evernote-checked"></polyline></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_8.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="133"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="377" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper.resources/svg_9.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="134"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="393" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_1.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="383"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="568" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_10.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="391"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="679" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_11.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="392"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="683" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_12.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="393"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="684" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_13.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="394"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="689" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_14.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" data-evernote-id="698" class="js-evernote-checked"><path fill-rule="evenodd" clip-rule="evenodd" d="M7.976 10.072l4.357-4.357.62.618L8.284 11h-.618L3 6.333l.619-.618 4.357 4.357z" data-evernote-id="699" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_15.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" data-evernote-id="728" class="js-evernote-checked"><path fill-rule="evenodd" clip-rule="evenodd" d="M7.976 10.072l4.357-4.357.62.618L8.284 11h-.618L3 6.333l.619-.618 4.357 4.357z" data-evernote-id="729" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_16.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" data-evernote-id="755" class="js-evernote-checked"><path fill-rule="evenodd" clip-rule="evenodd" d="M7.976 10.072l4.357-4.357.62.618L8.284 11h-.618L3 6.333l.619-.618 4.357 4.357z" data-evernote-id="756" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_17.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="395"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="859" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_18.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" data-evernote-id="863" class="js-evernote-checked"><path fill-rule="evenodd" clip-rule="evenodd" d="M7.976 10.072l4.357-4.357.62.618L8.284 11h-.618L3 6.333l.619-.618 4.357 4.357z" data-evernote-id="864" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_19.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" data-evernote-id="891" class="js-evernote-checked"><path fill-rule="evenodd" clip-rule="evenodd" d="M7.976 10.072l4.357-4.357.62.618L8.284 11h-.618L3 6.333l.619-.618 4.357 4.357z" data-evernote-id="892" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_2.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="384"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="569" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_20.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 512 512" xmlns="http://www.w3.org/2000/svg" data-evernote-id="927" class="js-evernote-checked"><path d="M256 48C141.2 48 48 141.2 48 256s93.2 208 208 208 208-93.2 208-208S370.8 48 256 48zm21 312h-42V235h42v125zm0-166h-42v-42h42v42z" data-evernote-id="928" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_21.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="396"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="930" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_22.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="397"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="931" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_23.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="398"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="934" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_24.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="399"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="936" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_25.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" data-evernote-id="941" class="js-evernote-checked"><path fill-rule="evenodd" clip-rule="evenodd" d="M7.976 10.072l4.357-4.357.62.618L8.284 11h-.618L3 6.333l.619-.618 4.357 4.357z" data-evernote-id="942" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_26.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" data-evernote-id="967" class="js-evernote-checked"><path fill-rule="evenodd" clip-rule="evenodd" d="M7.976 10.072l4.357-4.357.62.618L8.284 11h-.618L3 6.333l.619-.618 4.357 4.357z" data-evernote-id="968" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_27.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" data-evernote-id="993" class="js-evernote-checked"><path fill-rule="evenodd" clip-rule="evenodd" d="M7.976 10.072l4.357-4.357.62.618L8.284 11h-.618L3 6.333l.619-.618 4.357 4.357z" data-evernote-id="994" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_3.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="385"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="570" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_4.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="386"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="572" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_5.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 512 512" xmlns="http://www.w3.org/2000/svg" data-evernote-id="582" class="js-evernote-checked"><path d="M256 48C141.2 48 48 141.2 48 256s93.2 208 208 208 208-93.2 208-208S370.8 48 256 48zm21 312h-42V235h42v125zm0-166h-42v-42h42v42z" data-evernote-id="583" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_6.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="387"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="585" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_7.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="388"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="663" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_8.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="389"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="670" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_clipper_2.resources/svg_9.svg:
--------------------------------------------------------------------------------
1 | <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 20 20" class="anchor-heading-icon js-evernote-checked" xmlns="http://www.w3.org/2000/svg" data-evernote-id="390"><path fill-rule="evenodd" d="M12.586 4.586a2 2 0 112.828 2.828l-3 3a2 2 0 01-2.828 0 1 1 0 00-1.414 1.414 4 4 0 005.656 0l3-3a4 4 0 00-5.656-5.656l-1.5 1.5a1 1 0 101.414 1.414l1.5-1.5zm-5 5a2 2 0 012.828 0 1 1 0 101.414-1.414 4 4 0 00-5.656 0l-3 3a4 4 0 105.656 5.656l1.5-1.5a1 1 0 10-1.414-1.414l-1.5 1.5a2 2 0 11-2.828-2.828l3-3z" clip-rule="evenodd" data-evernote-id="673" class="js-evernote-checked"></path></svg>


--------------------------------------------------------------------------------
/demos/yinxiang_gbk.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selfboot/html2notion/2f02e7a465fcdb5e3a80631f7d29fcaefb195339/demos/yinxiang_gbk.html


--------------------------------------------------------------------------------
/demos/yinxiang_markdown.resources/5BB98FD9-8FA4-481F-AF4E-E3B1F2DD38BC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selfboot/html2notion/2f02e7a465fcdb5e3a80631f7d29fcaefb195339/demos/yinxiang_markdown.resources/5BB98FD9-8FA4-481F-AF4E-E3B1F2DD38BC.png


--------------------------------------------------------------------------------
/demos/yinxiang_mobile.html:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/><meta name="exporter-version" content="Evernote Mac 9.6.8 (470886)"/><meta name="created" content="2015-09-25 18:06:32 +0000"/><meta name="source" content="mobile.android"/><meta name="updated" content="2015-09-25 18:06:32 +0000"/><title>YinXiang Mobile</title></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;"><div>su<div><br/></div>dd if=/sdcard/twrp.img of=/dev/block/platform/msm_sdcc.1/by-name/recovery<div><br/></div></div></body></html>


--------------------------------------------------------------------------------
/demos/yinxiang_normal.html:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/><meta name="exporter-version" content="Evernote Mac 9.6.8 (470886)"/><meta name="keywords" content="openai"/><meta name="altitude" content="0"/><meta name="author" content="Selfboot"/><meta name="created" content="2023-04-20 11:13:19 +0000"/><meta name="latitude" content="32.1679321"/><meta name="longitude" content="-95.6691277"/><meta name="source" content="desktop.mac"/><meta name="source-url" content="https://bugcrowd.com/openai"/><meta name="updated" content="2023-05-16 05:11:30 +0000"/><title>yinxiang_normal</title></head><body style="font-size: 18px;"><div><span style="font-weight: bold; text-decoration: underline;">Overview</span><br/></div><div><br/></div><div>The <span style="color: rgb(255, 38, 0); font-weight: bold;">OpenAI API</span> can be applied to virtually any task that involves understanding or generating natural language, code, or images. We offer a spectrum of models with different levels of power suitable for different tasks, as well as the ability to fine-tune your own custom models. These models can be used for everything from content generation to semantic search and classification.</div><div><br/></div><div>The API is powered by a set of models with different capabilities and price points. GPT-4 is our latest and most powerful model. GPT-3.5-Turbo is the model that powers ChatGPT and is optimized for conversational formats. To learn more about these models and what else we offer, visit our <a href="https://platform.openai.com/docs/models">models documentation</a>.<br/></div><div><br/></div><div>This is <span style="color: rgb(255, 38, 0);">Red text</span>, <span style="color: rgb(0, 249, 0);">Green text</span>, <span style="color: rgb(170, 121, 66);">Gray text</span>, <span style="color: rgb(148, 33, 146);">Purple text</span>, <span style="color: rgb(255, 147, 0);">Orange text</span>, <span style="color: rgb(255, 251, 0);">Yellow text</span>. This is <a href="http://openai.com/">link content</a>, <span style="text-decoration: underline;">underline text</span>, <span style="font-weight: bold;">bold text</span>, <span style="text-decoration: line-through;">strikethrough text</span>, <span style="font-style: italic;">italic text</span>.</div><div><br/></div><div>This is <span style="color: rgb(255, 38, 0);">Red text</span>, <span style="color: rgb(0, 249, 0);">Green text</span>, <span style="color: rgb(170, 121, 66);">Gray text</span>, <span style="color: rgb(148, 33, 146);">Purple text</span>, <span style="color: rgb(255, 147, 0);">Orange text</span>, <span style="color: rgb(255, 251, 0);">Yellow text</span>. This is <a href="http://openai.com/">link content</a>, <span style="text-decoration: underline;">underline text</span>, <span style="font-weight: bold;">bold text</span>, <span style="text-decoration: line-through;">strikethrough text</span>, <span style="font-style: italic;">italic text</span>. (Content by copy)</div><div><br/></div><div>Quote test</div><div><br/></div><div style="box-sizing: border-box; padding: 8px; font-family: Monaco, Menlo, Consolas, &quot;Courier New&quot;, monospace; font-size: 12px; color: rgb(51, 51, 51); border-radius: 4px; background-color: rgb(251, 250, 248); border: 1px solid rgba(0, 0, 0, 0.15);-en-codeblock:true;"><div>Our models understand and process text by breaking it down into tokens. Tokens can be words or just chunks of characters. For example, the word “hamburger” gets broken up into the tokens “ham”, “bur” and “ger”, while a short and common word like “pear” is a single token. Many tokens start with a whitespace, for example “ hello” and “ bye”.</div><div><div><br/></div><div><br/></div></div><div>The number of tokens processed in a given API request depends on the length of both your inputs and outputs. As a rough rule of thumb, 1 token is approximately 4 characters or 0.75 words for English text. One limitation to keep in mind is that your text prompt and generated completion combined must be no more than the model's maximum context length (for most models this is 2048 tokens, or about 1500 words). Check out our tokenizer tool to learn more about how text translates to tokens.</div><div><br/></div><div>Quote 1</div><div>2</div><div>3</div><div><span style="color: rgb(255, 38, 0);">Read 4</span></div><div><span style="color: rgb(255, 38, 0);">5</span></div><div>6. <a href="https://openai.com/">OpenAI</a>’s mission is to create artificial intelligence systems that benefit everyone. To that end, we invest heavily in research and engineering to ensure our AI systems are safe and secure. However, as with any <font style="color: rgb(148, 33, 146);"><b>complex technology</b></font>, we understand that vulnerabilities and flaws can emerge.</div></div><div><br/></div><div>List test</div><ol><li><div>You are authorized to perform testing in compliance with this policy.</div></li><li><div>Follow this policy and any other relevant agreements. In case of inconsistency, this policy takes precedence.</div></li><li><div>Promptly report discovered vulnerabilities.</div></li></ol><div><br/></div><div>As part of this policy, we commit to:</div><ul><li><div>Provide Safe Harbor protection, as outlined below, for vulnerability research conducted according to these guidelines.</div></li><li><div>Cooperate with you in understanding and validating your report, ensuring a prompt initial response to your submission.</div></li><li><div>Remediate validated vulnerabilities in a timely manner.</div></li><li><div>Acknowledge and credit your contribution to improving our security, if you are the first to report a unique vulnerability that leads to a code or configuration change.</div></li></ul><div><u><b><br/></b></u></div><div>Local Image</div><div><br/></div><div><img src="yinxiang_normal.resources/7672861D-5C56-4A07-B0E6-256950F2775A.png" height="1054" width="2180"/><br/></div><div>ToDo List (with divider)</div><hr/><div><br/></div><div><input type="checkbox"/> Choose classes that map to a single <a href="https://platform.openai.com/tokenizer" rev="en_rl_none">token</a>. At inference time, specify <span style="font-weight: bold;">max_tokens=1</span> since you only need the first token for classification.</div><div><input type="checkbox"/> Use a separator at the end of the prompt, e.g. \n\n###\n\n.Remember to also append this separator when you eventually make requests to your model.</div><div><input checked="true" type="checkbox"/> Ensure that the prompt + completion doesn't exceed 2048 tokens, including the separator</div><div><br/></div><div>Table test</div><div><table style="border-collapse: collapse; min-width: 100%;"><colgroup><col style="width: 168px;"/><col style="width: 168px;"/><col style="width: 168px;"/><col style="width: 516px;"/></colgroup><tbody><tr><td style="width: 168px; padding: 8px; border: 1px solid;"><div><span style="box-sizing: border-box; background: var(--sh-bg); overflow-x: auto; --sh-fg: 255,255,255; --sh-bg: var(--gray-900); border-radius: 4px; font-size: 15px; min-height: 44px; letter-spacing: normal; orphans: 2; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; font-family: var(--monospace); font-variant-caps: normal; font-variant-ligatures: normal; line-height: 24px;">Row 1:</span><span style="box-sizing: border-box; background: var(--sh-bg); overflow-x: auto; --sh-fg: 255,255,255; --sh-bg: var(--gray-900); border-radius: 4px; font-size: 15px; min-height: 44px; letter-spacing: normal; orphans: 2; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; color: rgb(0, 166, 125); font-family: var(--monospace); font-variant-caps: normal; font-variant-ligatures: normal; line-height: 24px;"> Role, </span><span style="box-sizing: border-box; background: var(--sh-bg); overflow-x: auto; --sh-fg: 255,255,255; --sh-bg: var(--gray-900); border-radius: 4px; font-size: 15px; min-height: 44px; letter-spacing: normal; orphans: 2; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; font-family: var(--monospace); font-variant-caps: normal; font-variant-ligatures: normal; line-height: 24px;"><span style="font-size: 15px; font-family: var(--monospace); font-variant-caps: normal; font-variant-ligatures: normal; line-height: 24px; color: rgb(255, 38, 0);">Read, </span>Normal</span></div></td><td style="width: 168px; padding: 8px; border: 1px solid;"><div><span style="font-size: 15px; box-sizing: border-box; overflow-x: auto; --sh-fg: 255,255,255; --sh-bg: var(--gray-900); border-radius: 4px; min-height: 44px; letter-spacing: normal; orphans: 2; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; font-family: var(--monospace); font-variant-caps: normal; font-variant-ligatures: normal; line-height: 24px;">Row 1: </span><span style="font-size: 15px; font-family: var(--monospace); font-variant-caps: normal; font-variant-ligatures: normal; font-weight: bold; line-height: 24px;">Bold Text</span><span style="font-size: 15px; box-sizing: border-box; overflow-x: auto; --sh-fg: 255,255,255; --sh-bg: var(--gray-900); border-radius: 4px; min-height: 44px; letter-spacing: normal; orphans: 2; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; font-family: var(--monospace); font-variant-caps: normal; font-variant-ligatures: normal; line-height: 24px;">. </span><span style="font-size: 15px; font-family: var(--monospace); font-variant-caps: normal; font-variant-ligatures: normal; line-height: 24px; text-decoration: underline;">Undownline</span></div></td><td style="width: 168px; padding: 8px; border: 1px solid;"><div>Row 1: Link here: <a href="https://platform.openai.com/docs/guides/chat/introduction">https://platform.openai.com/docs/guides/chat/introduction</a></div></td><td style="width: 516px; padding: 8px; border: 1px solid;"><div>Row 1: In classification problems:</div><ul><li><div>List 1.</div></li><li><div>List 2</div></li></ul><div>Code here;</div></td></tr><tr><td style="width: 168px; padding: 8px; border: 1px solid;"><div>Row 2: System</div></td><td style="width: 168px; padding: 8px; border: 1px solid;"><div><span style="box-sizing: border-box; background: var(--sh-bg); overflow-x: auto; --sh-fg: 255,255,255; --sh-bg: var(--gray-900); border-radius: 4px; font-size: 15px; min-height: 44px; letter-spacing: normal; orphans: 2; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; font-family: var(--monospace); font-variant-caps: normal; font-variant-ligatures: normal; line-height: 24px;">Row 2:</span><span style="box-sizing: border-box; background: var(--sh-bg); overflow-x: auto; --sh-fg: 255,255,255; --sh-bg: var(--gray-900); border-radius: 4px; font-size: 15px; min-height: 44px; letter-spacing: normal; orphans: 2; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; color: rgb(0, 166, 125); font-family: var(--monospace); font-variant-caps: normal; font-variant-ligatures: normal; line-height: 24px;" /><span style="box-sizing: border-box; background: var(--sh-bg); overflow-x: auto; --sh-fg: 255,255,255; --sh-bg: var(--gray-900); border-radius: 4px; font-size: 15px; min-height: 44px; letter-spacing: normal; orphans: 2; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="font-size: 15px; color: rgb(148, 33, 146); font-family: var(--monospace); font-variant-caps: normal; font-variant-ligatures: normal; line-height: 24px;">You are a helpful assistant.</span></span></div></td><td style="width: 168px; padding: 8px; border: 1px solid;"><div>Row 2: none</div></td><td style="width: 516px; padding: 8px; border: 1px solid;"><div><br/></div></td></tr></tbody></table><div><br/></div></div><div><br/></div></body></html>


--------------------------------------------------------------------------------
/demos/yinxiang_normal.resources/7672861D-5C56-4A07-B0E6-256950F2775A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selfboot/html2notion/2f02e7a465fcdb5e3a80631f7d29fcaefb195339/demos/yinxiang_normal.resources/7672861D-5C56-4A07-B0E6-256950F2775A.png


--------------------------------------------------------------------------------
/demos/yinxiang_normal_format.html:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/><meta name="exporter-version" content="Evernote Mac 9.6.10 (471077)"/><meta name="created" content="2014-10-28 12:56:46 +0000"/><meta name="latitude" content="38.1437"/><meta name="longitude" content="114.463"/><meta name="source" content="mobile.android"/><meta name="source-url" content="http://www.zhihu.com/question/25725750/answer/31454430"/><meta name="updated" content="2023-05-17 11:57:05 +0000"/><title>yinxiang_normal_format</title></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;">
  4 | <div style="text-align: center"><span style="font-size: 24px;"><b>趣味篇</b></span></div>
  5 | <hr/>
  6 | <div><br/></div>
  7 | <div>1. 唐僧其实是一个俗人。悟空才是天生有慧根。每当唐僧在路上唉声叹气取经苦，都是悟空引经据典地安慰他。比如说三十二回中：</div>
  8 | <div><br/></div>
  9 | <table style="-evernote-table:true;border-collapse:collapse;width:100%;table-layout:fixed;margin-left:0px;">
 10 | <tr>
 11 | <td style="border-style:solid;border-width:4px;border-color:rgb(0,249,0);padding:10px;margin:0px;width:100%;">
 12 | <div><span style="font-size: 16px; font-family: 'Heiti SC Light';">师徒们正行赏间，又见一山挡路。唐僧道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">徒弟们仔细，前遇山高，恐有虎狼阻挡</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">行者道：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">师父，出家人莫说在家话。你记得那乌巢和尚的《心经》云心无挂碍，无挂碍，方无恐怖，远离颠倒梦想之言？但只是扫除心上垢，洗净耳边尘。不受苦中苦，难为人上人。你莫生忧虑，但有老孙，就是塌下天来，可保无事。怕什么虎狼</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">！</span><span style="font-size: 16px;">”</span><br/></div>
 13 | </td>
 14 | </tr>
 15 | </table>
 16 | <div><br/></div>
 17 | <div>其实唐僧最俗的地方还可以见诸很多细节，比如说，作为一个出家人，唐僧从来不在意别人杀生。当然这个生指的是动物。比如说刚刚收了孙悟空，路遇一大老虎，被孙悟空一棒打死，唐僧的关注点是：天啊！之前那个猎户这么厉害，还跟老虎斗了半天，这猴子竟然能一棒打死一只！</div>
 18 | <div><br/></div>
 19 | <div>当然如果悟空杀人的话唐僧是会怒的，但是他的关注点仍然和正常的出家人不一样：</div>
 20 | <div>他会先吟两句诗以示出家人的慈悲为怀：</div>
 21 | <div><br/></div>
 22 | <table style="-evernote-table:true;border-collapse:collapse;width:100%;table-layout:fixed;margin-left:0px;">
 23 | <tr>
 24 | <td style="border-style:solid;border-width:4px;border-color:rgb(0,249,0);padding:10px;margin:0px;width:100%;">
 25 | <div><span style="font-size: 16px; font-family: 'Heiti SC Light';">扫地恐伤蝼蚁命，爱惜飞蛾纱罩灯。</span><br/></div>
 26 | </td>
 27 | </tr>
 28 | </table>
 29 | <div><br/></div>
 30 | <div>当然如前所述，如果孙悟空真的只是打杀了蝼蚁或飞蛾，他是不会管的。随后唐僧说的话就很有趣了：</div>
 31 | <div><br/></div>
 32 | <table style="-evernote-table:true;border-collapse:collapse;width:100%;table-layout:fixed;margin-left:0px;">
 33 | <tr>
 34 | <td style="border-style:solid;border-width:4px;border-color:rgb(0,249,0);padding:10px;margin:0px;width:100%;">
 35 | <div><span style="font-size: 16px; font-family: 'Heiti SC Light';">早还是山野中无人查考，若到城市，倘有人一时冲撞了你，你也行凶，执着棍子，乱打伤人，我可做得白客，怎能脱身？</span></div>
 36 | </td>
 37 | </tr>
 38 | </table>
 39 | <div><br/></div>
 40 | <div>看见没，他关心的是你乱杀人连累了我怎么办？又道：此事若告到官，就是你老子做官，也说不过去 。</div>
 41 | <div>
 42 | <div><br/></div>
 43 | 这就更有趣了，说的是就算你爸是李刚也没用。这样一套对话在西游记里出现了可不止一次，算是唐僧的经典演讲路线了。这思维哪里是圣僧，分明是一个封建小农啊。</div>
 44 | <div><br/></div>
 45 | <div>2. 再说说唐僧什么时候念紧箍咒：不是在悟空犯错的时候，而是在悟空令他不爽的时候。比如说遇到一个喜欢收集袈裟的和尚，当时悟空入世未深，想要把师傅的袈裟拿出来让人家开开眼界，反倒是唐僧说对方似心怀险恶，不要露富。（顺便插一句，从这里就可以看出唐僧其实懂得人心险恶，根本没有大多数人以为的那样天真！那他为什么总是看不出孙悟空说的是真话？这又是另一个故事了。）结果孙悟空不听呀，必须要炫耀呀，最后衣服被偷了呀。唐僧一听自己的宝贝晚礼服丢了，那个气呀！当场把紧箍咒好好念了个几遍。</div>
 46 | <div><br/></div>
 47 | <div>再比如说但凡猪八戒撺掇唐僧念咒，唐僧多半是会念的。</div>
 48 | <div><br/></div>
 49 | <div>对比之下，孙悟空推倒人参果树，这错够大了吧？唐僧的反应竟然是：矮油，这是你的不对啦，赶快跟道长道个歉啦。孙悟空说：哼！于是唐僧只好带着徒弟拔腿就跑。</div>
 50 | <div><br/></div>
 51 | <div style="text-align: center"><span style="font-size: 24px;"><b>心酸篇</b></span></div>
 52 | <div>
 53 | <hr/></div>
 54 | <div><br/></div>
 55 | <div>我看西游记的时候是很心疼孙悟空的，三个徒弟里他最爱师傅，师傅却永远最不相信他，简直是太虐了。
 56 | <div><br/></div>
 57 | </div>
 58 | <div>1. 唐僧很少撒谎，但在要孙悟空带上金箍的时候骗了他。</div>
 59 | <div>
 60 | <div><br/></div>
 61 | </div>
 62 | <div>2. 孙悟空三打白骨精，被唐僧赶走了。很多人想必还记得他临走前要拜唐僧，唐僧不受，于是他变作四个围着唐僧拜了一拜吧？拜完之后，悟空独自一人返回了花果山。</div>
 63 | <div>
 64 | <div><br/></div>
 65 | </div>
 66 | <div>你看他忍气别了师父，纵筋斗云，径回花果山水帘洞去了。独自个凄凄惨惨，忽闻得水声聒耳，大圣在那半空里看时，原来是东洋大海潮发的声响。一见了，又想起唐僧，止不住腮边泪坠，停云住步，良久方去。</div>
 67 | <div>
 68 | <div><br/></div>
 69 | 3. 这次被赶走应是孙悟空人生中的一个转折点。在那之前，看见妖怪他只会直接一棒子打死，指着一坨人形肉泥说：师傅！这是妖怪！但再那之后，他明白了世事没有那么单纯，好好的一个少年从此就踏上了腹黑之路，唉。
 70 | <div><br/></div>
 71 | </div>
 72 | <div>4. 孙悟空被赶走没多久，唐僧遇险，剩下两徒弟加白龙马都无计可施，白龙马苦劝之下猪八戒回去找大师兄。孙悟空明知猪八戒来意，但是装作不知，只带他逛花果山，享受着猪八戒的心焦，直到猪八戒忍不住暴露来意，才顺水推舟地勉强答应。这时候的孙悟空已经和被赶走之前的不太一样了。</div>
 73 | <div><br/></div>
 74 | <div>但是他一面跟八戒说，我就是看在妖怪太嚣张的份上帮你们一把，事了还回来做我的猴王。一面却跟花果山的小猴们说：天底下谁不知道我是唐僧的徒弟，我跟师傅感情好着呢，只是他看我想家，让我回来玩两天，现在我要回去陪他取经啦。（真是太傲娇了呀）</div>
 75 | <div><br/></div>
 76 | <div>等救了唐僧出来，唐僧感谢他，假装不记得曾经赶走他的事，孙悟空也绝口不提，就仿佛什么都没发生过一样，师徒四人照常上路了。
 77 | <div><br/></div>
 78 | 5. 但是还是可以很明显地看到孙悟空的变化。下一次遇到妖怪，他不再冲锋陷阵了。他打算诳猪八戒先去试试深浅，反正就算被捉住了他也有自信救回来，这样还能显得他比较有本事。于是他首先问猪八戒：照顾师傅和探路你愿做哪一个？照顾师傅嘛，你要陪他上厕所、扶他走路、还要负责喂饱他，饿了瘦了的话你就给我等着。
 79 | <div><br/></div>
 80 | </div>
 81 | <div>猪八戒一听，当然是探路去啊！孙悟空料到猪八戒不会好好干活，变成虫鸟跟着他，并把他偷懒的劣迹回来一一汇报给唐僧，总算博得了唐僧的一次信任。当徒弟累成这样，也是不容易呀。</div>
 82 | <div><br/></div>
 83 | <div><br/></div>
 84 | <div style="text-align: center"><span style="font-size: 24px;"><b>附录篇</b></span></div>
 85 | <div>
 86 | <hr/></div>
 87 | <div>趣味篇第一条见第十四回：</div>
 88 | <div><br/></div>
 89 | <table style="-evernote-table:true;border-collapse:collapse;width:100%;table-layout:fixed;margin-left:0px;">
 90 | <tr>
 91 | <td style="border-style:solid;border-width:4px;border-color:rgb(0,249,0);padding:10px;margin:0px;width:100%;">
 92 | <div><span style="font-size: 16px; font-family: 'Heiti SC Light';">那只虎蹲着身，伏在尘埃，动也不敢动动。却被他照头一棒，就打的脑浆迸万点桃红，牙齿喷几珠玉块，唬得那陈玄奘滚鞍落马，咬指道声</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">天哪，天哪！刘太保前日打的斑斓虎，还与他斗了半日。今日孙悟空不用争持，把这虎一棒打得稀烂，正是强中更有强中手</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">！</span><span style="font-size: 16px;">”</span><br/></div>
 93 | </td>
 94 | </tr>
 95 | </table>
 96 | <div><br/></div>
 97 | <div>心酸篇第一条还是第十四回：</div>
 98 | <div><br/></div>
 99 | <table style="-evernote-table:true;border-collapse:collapse;width:100%;table-layout:fixed;margin-left:0px;">
100 | <tr>
101 | <td style="border-style:solid;border-width:4px;border-color:rgb(0,249,0);padding:10px;margin:0px;width:100%;">
102 | <div><span style="font-size: 16px; font-family: 'Heiti SC Light';">行者去解开包袱，在那包裹中间见有几个粗面烧饼，拿出来递与师父。又见那光艳艳的一领绵布直裰，一顶嵌金花帽，行者道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">这衣帽是东土带来的？</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">三藏就顺口儿答应道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">是我小时穿戴的。这帽子若戴了，不用教经，就会念经；这衣服若穿了，不用演礼，就会行礼</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">行者道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">好师父，把与我穿戴了罢</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">三藏道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">只怕长短不一，你若穿得，就穿了罢</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">行者遂脱下旧白布直裰，将绵布直裰穿上，也就是比量着身体裁的一般，把帽儿戴上。三藏见他戴上帽子，就不吃干粮，却默默的念那紧箍咒一遍。行者叫道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">头痛，头痛</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">！</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">那师父不住的又念了几遍，把个行者痛得打滚，抓破了嵌金的花帽。</span><br/></div>
103 | </td>
104 | </tr>
105 | </table>
106 | <div><br/></div>
107 | <div>心酸篇第四条见第三十一回：</div>
108 | <div><br/></div>
109 | <table style="-evernote-table:true;border-collapse:collapse;width:100%;table-layout:fixed;margin-left:0px;">
110 | <tr>
111 | <td style="border-style:solid;border-width:4px;border-color:rgb(0,249,0);padding:10px;margin:0px;width:100%;">
112 | <div><span style="font-size: 16px; font-family: 'Heiti SC Light';">行者道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">贤弟，你起来。不是我去不成，既是妖精敢骂我，我就不能不降他，我和你去。老孙五百年前大闹天宫，普天的神将看见我，一个个控背躬身，口口称呼大圣。这妖怪无礼，他敢背前面后骂我！我这去，把他拿住，碎尸万段，以报骂我之仇！报毕，我即回来</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">八戒道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">哥哥，正是，你只去拿了妖精，报了你仇，那时来与不来，任从尊意。</span><span style="font-size: 16px;">”</span> <span style="font-size: 16px; font-family: 'Heiti SC Light';">那猴才跳下崖，撞入洞里，脱了妖衣，整一整锦直裰，束一束虎皮裙，执了铁棒，径出门来。慌得那群猴拦住道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">大圣爷爷，你往那里去？带挈我们耍子几年也好</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">行者道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">小的们，你说那里话！我保唐僧的这桩事，天上地下，都晓得孙悟空是唐僧的徒弟。他倒不是赶我回来，倒是教我来家看看，送我来家自在耍子。如今只因这件事，你们却都要仔细看守家业，依时插柳栽松，毋得废坠，待我还去保唐僧，取经回东土。功成之后，仍回来与你们共乐天真</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">众猴各各领命。</span><br/></div>
113 | </td>
114 | </tr>
115 | </table>
116 | <div><br/></div>
117 | <div>又</div>
118 | <div><br/></div>
119 | <table style="-evernote-table:true;border-collapse:collapse;width:100%;table-layout:fixed;margin-left:0px;">
120 | <tr>
121 | <td style="border-style:solid;border-width:4px;border-color:rgb(0,249,0);padding:10px;margin:0px;width:100%;">
122 | <div><span style="font-size: 16px; font-family: 'Heiti SC Light';">长老现了原身，定性睁睛，才认得是行者，一把搀住道：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">悟空！你从那里来也？</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">沙僧侍立左右，把那请行者降妖精，救公主，解虎气，并回朝上项事，备陈了一遍。三藏谢之不尽道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">贤徒，亏了你也，亏了你也！这一去，早诣西方，径回东土，奏唐王，你的功劳第一</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">行者笑道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">莫说莫说！但不念那话儿，足感爱厚之情也</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">国王闻此言，又劝谢了他四众，整治素筵，大开东阁。他师徒受了皇恩，辞王西去。</span><br/></div>
123 | </td>
124 | </tr>
125 | </table>
126 | <div><br/></div>
127 | <div>心酸篇第五条见第三十二回：</div>
128 | <div><br/></div>
129 | <table style="-evernote-table:true;border-collapse:collapse;width:100%;table-layout:fixed;margin-left:0px;">
130 | <tr>
131 | <td style="border-style:solid;border-width:4px;border-color:rgb(0,249,0);padding:10px;margin:0px;width:100%;">
132 | <div><span style="font-size: 16px; font-family: 'Heiti SC Light';">行者闻言，把功曹叱退，切切在心，按云头，径来山上。只见长老与八戒、沙僧，簇拥前进，他却暗想</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">我若把功曹的言语实实告诵师父，师父他不济事，必就哭了；假若不与他实说，梦着头，带着他走，常言道乍入芦圩，不知深浅。倘或被妖魔捞去，却不又要老孙费心？且等我照顾八戒一照顾，先着他出头与那怪打一仗看。若是打得过他，就算他一功；若是没手段，被怪拿去，等老孙再去救他不迟，却好显我本事出名。</span><span style="font-size: 16px;">”</span><br/></div>
133 | </td>
134 | </tr>
135 | </table>
136 | <div><br/></div>
137 | <div>又</div>
138 | <div><br/></div>
139 | <table style="-evernote-table:true;border-collapse:collapse;width:100%;table-layout:fixed;margin-left:0px;">
140 | <tr>
141 | <td style="border-style:solid;border-width:4px;border-color:rgb(0,249,0);padding:10px;margin:0px;width:100%;">
142 | <div><span style="font-size: 16px; font-family: 'Heiti SC Light';">呆子真个对行者说道：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">哥哥，你教我做甚事？</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">行者道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">第一件是看师父，第二件是去巡山。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">八戒道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">看师父是坐，巡山去是走。终不然教我坐一会又走，走一会又坐，两处怎么顾盼得来？</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">行者道：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">不是教你两件齐干，只是领了一件便</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">罢</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">八戒又笑道：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">这等也好计较。但不知看师父是怎样，巡山是怎样，你先与我讲讲，等我依个相应些儿的去干罢</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">行者道</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">：</span><span style="font-size: 16px;">“</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">看师父啊，师父去出恭，你伺候；师父要走路，你扶持；师父要吃斋，你化斋。若他饿了些儿，你该打；黄了些儿脸皮，你该打；瘦了些儿形骸，你该打</span><span style="font-size: 16px; font-family: 'Heiti SC Light';">。</span><span style="font-size: 16px;">”</span><br/></div>
143 | </td>
144 | </tr>
145 | </table>
146 | <div><br/></div>
147 | </body></html>


--------------------------------------------------------------------------------
/demos/yinxiang_notion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selfboot/html2notion/2f02e7a465fcdb5e3a80631f7d29fcaefb195339/demos/yinxiang_notion.png


--------------------------------------------------------------------------------
/demos/yinxiang_notion2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selfboot/html2notion/2f02e7a465fcdb5e3a80631f7d29fcaefb195339/demos/yinxiang_notion2.png


--------------------------------------------------------------------------------
/demos/yinxiang_supernote.html:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/><meta name="exporter-version" content="Evernote Mac 9.6.8 (470886)"/><meta name="keywords" content="openai, supernote"/><meta name="altitude" content="0"/><meta name="author" content="Selfboot"/><meta name="created" content="2023-04-23 03:31:48 +0000"/><meta name="latitude" content="23.10868835449219"/><meta name="longitude" content="113.3146546967309"/><meta name="source" content="yinxiang.superNote"/><meta name="source-url" content="https://platform.openai.com/docs/introduction/key-concepts"/><meta name="updated" content="2023-05-16 05:23:30 +0000"/><meta name="content-class" content="yinxiang.superNote"/><title>yinxiang_supernote</title></head><body><h1>Introduction</h1><hr/><h2>Overview</h2><p style="text-align:start;">The <span style="color: #FF0000;">OpenAI API</span> can be applied to virtually any task that involves understanding or <b><u>generating natural language, code, or images</u></b>. We offer a spectrum of models with different levels of power suitable for different tasks, as well as the ability to <code style="-en-code: true">fine-tune</code> your own custom models. <i><u>These models</u></i> can be used for everything from content generation to <s>semantic search and classification</s>.</p><p style="text-align:start;"><br/></p><h2>Key concepts</h2><div>Quote from here:</div><div><br/></div><div style="--en-blockquote:true;box-sizing: border-box; padding-left: 19px; padding-top: 6px; padding-bottom: 6px; border-left: 3px solid #b4c0cc; background-position: initial initial; background-repeat: initial initial; margin-top: 6px"><div>We recommend completing our quickstart tutorial to get acquainted with key concepts through a <span style="color: #9B00FF;">hands-on, interactive example</span>.</div><div><br/></div><div>First, you’ll need a prompt that makes it clear what you want. Let’s start with an instruction. <b>Submit this prompt</b> to generate your first completion.</div></div><div>Prompts: <a href="https://platform.openai.com/docs/guides/completion/prompt-design" rev="en_rl_none">Designing your prompt</a> is essentially how you “program” the model, usually by providing some instructions or a few examples.</div><h1>Next steps</h1><ul><li><div>Keep our usage policies in mind as you start building your application.</div></li><li><div>Explore our examples library for inspiration.</div></li></ul><h3>Code</h3><div style="--en-codeblock:true;--en-codeblockLanguage:python;box-sizing: border-box; padding: 8px; font-family: Monaco, Menlo, Consolas, &quot;Courier New&quot;, monospace; font-size: 12px; color: rgb(51, 51, 51); border-top-left-radius: 4px; border-top-right-radius: 4px; border-bottom-right-radius: 4px; border-bottom-left-radius: 4px; background-color: rgb(251, 250, 248); border: 1px solid rgba(0, 0, 0, 0.14902); background-position: initial initial; background-repeat: initial initial; margin-top: 6px;">import os
4 | print("hello")</div><div><br/></div><div>TODO List</div><hr/><div><br/></div><ul style=""><li style=""><div><input type="checkbox"/>Choose classes that map to a single <a href="https://platform.openai.com/tokenizer" rev="en_rl_none">token</a>. At inference time, specify <span style="font-weight: 500;">max_tokens=1</span> since you only need the first token for classification.</div></li><li style=""><div><input type="checkbox"/>Use a separator at the end of the prompt, e.g. <code style="-en-code: true"><span style="font-weight: 500;">\n\n###\n\n</span></code>.Remember to also append this separator when you eventually make requests to your model.</div></li><li style=""><div><input checked="true" type="checkbox"/>Ensure that the prompt + completion doesn't exceed 2048 tokens, including the separator</div></li></ul><div><br/></div><div>Table</div><table style="--en-fitwindow:false;border-left:1px solid #d9d9d9;border-top:1px solid #d9d9d9;border-collapse:collapse;width:767px;" width="767px"><colgroup><col style="width: 168px;"/><col style="width: 168px;"/><col style="width: 168px;"/><col style="width: 263px;"/></colgroup><tbody><tr><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div><span style="font-size: 15px;"><span style="font-family: var(--monospace);">Row 1:<span style="color: rgb(0, 166, 125);"> Role, </span><span style="color: rgb(255, 38, 0);">Read, </span>Normal</span></span></div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div><span style="font-size: 15px;"><span style="font-family: var(--monospace);">Row 1: <span style="font-weight: bold;">Bold Text</span>. <u>Undownline</u></span></span></div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Row 1: Link here: <a href="https://platform.openai.com/docs/guides/chat/introduction" rev="en_rl_none">https://platform.openai.com/docs/guides/chat/introduction</a></div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Row 1: In classification problems:</div><ul><li><div>List 1.</div></li><li><div>List 2</div></li></ul><div>Code here;</div></td></tr><tr><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Row 2: System</div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div><span style="font-size: 15px;"><span style="font-family: var(--monospace);">Row 2:<span style="color: rgb(148, 33, 146);">You are a helpful assistant.</span></span></span></div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Row 2: none</div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div><br/></div></td></tr></tbody></table><table style="--en-fitwindow:false;border-left:1px solid #d9d9d9;border-top:1px solid #d9d9d9;border-collapse:collapse;width:760px;" width="760px"><colgroup><col style="width: 190px;"/><col style="width: 190px;"/><col style="width: 190px;"/><col style="width: 190px;"/></colgroup><tbody><tr><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Animal</div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Names</div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Column</div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Column2</div></td></tr><tr><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Cat</div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Captain</div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>pading</div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>agagin</div></td></tr><tr><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Dog</div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>Ruff the Protector</div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div>null</div></td><td style="--en-typeInfo:{&quot;type&quot;:&quot;text&quot;,&quot;data&quot;:{}};border-right:1px solid #d9d9d9;border-bottom:1px solid #d9d9d9;padding:10px;"><div><br/></div></td></tr></tbody></table><div><br/></div></body></html>


--------------------------------------------------------------------------------
/examples/insert_divider.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "attachments": {},
 5 |    "cell_type": "markdown",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "Initialize the API key and database ID of Notion."
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": null,
14 |    "metadata": {},
15 |    "outputs": [],
16 |    "source": [
17 |     "import json\n",
18 |     "config = {}\n",
19 |     "with open('../.config.json', \"r\") as f:\n",
20 |     "    json_conf = json.load(f)\n",
21 |     "    config.update(json_conf)\n",
22 |     "\n",
23 |     "notion_api_key = config['notion']['api_key']\n",
24 |     "database_id = config['notion']['database_id']\n",
25 |     "\n",
26 |     "print(notion_api_key, database_id)"
27 |    ]
28 |   },
29 |   {
30 |    "attachments": {},
31 |    "cell_type": "markdown",
32 |    "metadata": {},
33 |    "source": [
34 |     "Ref\n",
35 |     "https://developers.notion.com/reference/block#to-do"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "code",
40 |    "execution_count": null,
41 |    "metadata": {},
42 |    "outputs": [],
43 |    "source": [
44 |     "from notion_client import Client\n",
45 |     "notion = Client(auth=notion_api_key)\n",
46 |     "parent = {\"type\": \"database_id\", \"database_id\": database_id}\n",
47 |     "\n",
48 |     "properties = {\"Title\": {\"title\": [{\"text\": {\"content\": \"Page with divider\"}}]}}\n",
49 |     "\n",
50 |     "divider_block = [\n",
51 |     "    {\n",
52 |     "        \"type\": \"divider\",\n",
53 |     "        \"divider\": {}\n",
54 |     "    }\n",
55 |     "]\n",
56 |     "\n",
57 |     "created_page = notion.pages.create(\n",
58 |     "    parent=parent,\n",
59 |     "    properties=properties,\n",
60 |     "    children=divider_block\n",
61 |     ")\n",
62 |     "\n",
63 |     "from typing import Dict, Any, cast\n",
64 |     "created_page = cast(Dict[str, Any], created_page)\n",
65 |     "print(f'page_id = {created_page[\"id\"]}')"
66 |    ]
67 |   }
68 |  ],
69 |  "metadata": {
70 |   "kernelspec": {
71 |    "display_name": "openai",
72 |    "language": "python",
73 |    "name": "python3"
74 |   },
75 |   "language_info": {
76 |    "codemirror_mode": {
77 |     "name": "ipython",
78 |     "version": 3
79 |    },
80 |    "file_extension": ".py",
81 |    "mimetype": "text/x-python",
82 |    "name": "python",
83 |    "nbconvert_exporter": "python",
84 |    "pygments_lexer": "ipython3",
85 |    "version": "3.11.2"
86 |   },
87 |   "orig_nbformat": 4
88 |  },
89 |  "nbformat": 4,
90 |  "nbformat_minor": 2
91 | }
92 | 


--------------------------------------------------------------------------------
/examples/insert_table.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "Initialize the API key and database ID of Notion."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import json\n",
 18 |     "config = {}\n",
 19 |     "with open('../.config.json', \"r\") as f:\n",
 20 |     "    json_conf = json.load(f)\n",
 21 |     "    config.update(json_conf)\n",
 22 |     "\n",
 23 |     "notion_api_key = config['notion']['api_key']\n",
 24 |     "database_id = config['notion']['database_id']\n",
 25 |     "\n",
 26 |     "print(notion_api_key, database_id)"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "attachments": {},
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "Create a table, note that when creating it, you can directly insert table rows. However, when reading it, you actually need to first obtain the blockid of the table in the page and then retrieve its children blocks.\n",
 35 |     "\n",
 36 |     "https://developers.notion.com/reference/block#table\n",
 37 |     "https://developers.notion.com/changelog/simple-table-support"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "from notion_client import Client\n",
 47 |     "notion = Client(auth=notion_api_key)\n",
 48 |     "parent = {\"type\": \"database_id\", \"database_id\": database_id}\n",
 49 |     "\n",
 50 |     "properties = {\"Title\": {\"title\": [{\"text\": {\"content\": \"Page with table\"}}]}}\n",
 51 |     "\n",
 52 |     "one_row = {\n",
 53 |     "    \"type\": \"table_row\",\n",
 54 |     "    \"table_row\": {\n",
 55 |     "        \"cells\": [\n",
 56 |     "            [\n",
 57 |     "                {\n",
 58 |     "                    \"type\": \"text\",\n",
 59 |     "                    \"text\": {\n",
 60 |     "                        \"content\": \"column 1 content\",\n",
 61 |     "                    },\n",
 62 |     "                    \"plain_text\": \"column 1 content\",\n",
 63 |     "                }\n",
 64 |     "            ],\n",
 65 |     "            [\n",
 66 |     "                {\n",
 67 |     "                    \"type\": \"text\",\n",
 68 |     "                    \"text\": {\n",
 69 |     "                        \"content\": \"column 2 content\",\n",
 70 |     "\n",
 71 |     "                    },\n",
 72 |     "                    \"plain_text\": \"column 2 content\",\n",
 73 |     "                }\n",
 74 |     "            ]\n",
 75 |     "        ]\n",
 76 |     "    }\n",
 77 |     "}\n",
 78 |     "children = [\n",
 79 |     "    {\n",
 80 |     "        \"table\": {\n",
 81 |     "            \"has_row_header\": False,\n",
 82 |     "            \"has_column_header\": False,\n",
 83 |     "            \"table_width\": 2,\n",
 84 |     "            \"children\": [one_row],\n",
 85 |     "        }\n",
 86 |     "    }\n",
 87 |     "]\n",
 88 |     "\n",
 89 |     "created_page = notion.pages.create(\n",
 90 |     "    parent=parent,\n",
 91 |     "    properties=properties,\n",
 92 |     "    children=children\n",
 93 |     ")\n",
 94 |     "\n",
 95 |     "from typing import Dict, Any, cast\n",
 96 |     "created_page = cast(Dict[str, Any], created_page)\n",
 97 |     "print(f'page_id = {created_page[\"id\"]}')"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "from notion_client import Client\n",
107 |     "notion = Client(auth=notion_api_key)\n",
108 |     "parent = {\"type\": \"database_id\", \"database_id\": database_id}\n",
109 |     "\n",
110 |     "properties = {\"Title\": {\"title\": [{\"text\": {\"content\": \"Page with table(has header)\"}}]}}\n",
111 |     "\n",
112 |     "children = [\n",
113 |     "    {\n",
114 |     "        \"table\": {\n",
115 |     "            \"has_row_header\": False,\n",
116 |     "            \"has_column_header\": True,\n",
117 |     "            \"table_width\": 2,\n",
118 |     "            \"children\": [one_row],\n",
119 |     "        }\n",
120 |     "    }\n",
121 |     "]\n",
122 |     "\n",
123 |     "for i in range(1, 4):\n",
124 |     "    one_row = {\n",
125 |     "        \"type\": \"table_row\",\n",
126 |     "        \"table_row\": {\n",
127 |     "            \"cells\": [\n",
128 |     "                [\n",
129 |     "                    {\n",
130 |     "                        \"type\": \"text\",\n",
131 |     "                        \"text\": {\n",
132 |     "                            \"content\": f\"column 1 content {i}\",\n",
133 |     "                        },\n",
134 |     "                        \"plain_text\": f\"column 1 content {i}\",\n",
135 |     "                    }\n",
136 |     "                ],\n",
137 |     "                [\n",
138 |     "                    {\n",
139 |     "                        \"type\": \"text\",\n",
140 |     "                        \"text\": {\n",
141 |     "                            \"content\": f\"column 2 content {i}\",\n",
142 |     "\n",
143 |     "                        },\n",
144 |     "                        \"plain_text\": f\"column 2 content {i}\",\n",
145 |     "                    }\n",
146 |     "                ]\n",
147 |     "            ]\n",
148 |     "        }\n",
149 |     "    }\n",
150 |     "    children[0][\"table\"][\"children\"].append(one_row)\n",
151 |     "\n",
152 |     "created_page = notion.pages.create(\n",
153 |     "    parent=parent,\n",
154 |     "    properties=properties,\n",
155 |     "    children=children\n",
156 |     ")\n",
157 |     "\n",
158 |     "from typing import Dict, Any, cast\n",
159 |     "created_page = cast(Dict[str, Any], created_page)\n",
160 |     "print(f'page_id = {created_page[\"id\"]}')"
161 |    ]
162 |   }
163 |  ],
164 |  "metadata": {
165 |   "kernelspec": {
166 |    "display_name": "openai",
167 |    "language": "python",
168 |    "name": "python3"
169 |   },
170 |   "language_info": {
171 |    "codemirror_mode": {
172 |     "name": "ipython",
173 |     "version": 3
174 |    },
175 |    "file_extension": ".py",
176 |    "mimetype": "text/x-python",
177 |    "name": "python",
178 |    "nbconvert_exporter": "python",
179 |    "pygments_lexer": "ipython3",
180 |    "version": "3.11.0"
181 |   },
182 |   "orig_nbformat": 4
183 |  },
184 |  "nbformat": 4,
185 |  "nbformat_minor": 2
186 | }
187 | 


--------------------------------------------------------------------------------
/examples/insert_text.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "Initialize the API key and database ID of Notion."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import json\n",
 18 |     "config = {}\n",
 19 |     "with open('../.config.json', \"r\") as f:\n",
 20 |     "    json_conf = json.load(f)\n",
 21 |     "    config.update(json_conf)\n",
 22 |     "\n",
 23 |     "notion_api_key = config['notion']['api_key']\n",
 24 |     "database_id = config['notion']['database_id']\n",
 25 |     "\n",
 26 |     "print(notion_api_key, database_id)"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "attachments": {},
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "Limits for property values\n",
 35 |     "Rich text object\ttext.content\t2000 characters\n",
 36 |     "\n",
 37 |     "https://developers.notion.com/reference/request-limits\n",
 38 |     "https://developers.notion.com/reference/rich-textupport"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "from notion_client import Client\n",
 48 |     "notion = Client(auth=notion_api_key)\n",
 49 |     "parent = {\"type\": \"database_id\", \"database_id\": database_id}\n",
 50 |     "properties = {\"Title\": {\"title\": [{\"text\": {\"content\": \"Page with multi text\"}}]}}\n",
 51 |     "text_content = \"Some words\" \n",
 52 |     "text_block = {\n",
 53 |     "    \"type\": \"text\",\n",
 54 |     "    \"text\": {\n",
 55 |     "        \"content\": text_content,\n",
 56 |     "    },\n",
 57 |     "    \"annotations\": {\n",
 58 |     "        \"bold\": False,\n",
 59 |     "        \"italic\": False,\n",
 60 |     "        \"strikethrough\": False,\n",
 61 |     "        \"underline\": False,\n",
 62 |     "        \"code\": False,\n",
 63 |     "        \"color\": \"default\"\n",
 64 |     "    },\n",
 65 |     "    \"plain_text\": text_content,\n",
 66 |     "    \"href\": None\n",
 67 |     "}\n",
 68 |     "\n",
 69 |     "equation = {\n",
 70 |     "  \"type\": \"equation\",\n",
 71 |     "  \"equation\": {\n",
 72 |     "    \"expression\": \"e=mc^2\"\n",
 73 |     "  }\n",
 74 |     "}\n",
 75 |     "\n",
 76 |     "children = [{\n",
 77 |     "    \"object\": \"block\",\n",
 78 |     "    \"type\": \"paragraph\",\n",
 79 |     "    \"paragraph\": {\n",
 80 |     "        \"rich_text\": [text_block, equation]\n",
 81 |     "    }\n",
 82 |     "}]\n",
 83 |     "\n",
 84 |     "created_page = notion.pages.create(\n",
 85 |     "    parent=parent,\n",
 86 |     "    properties=properties,\n",
 87 |     "    children=children\n",
 88 |     ")\n",
 89 |     "\n",
 90 |     "from typing import Dict, Any, cast\n",
 91 |     "created_page = cast(Dict[str, Any], created_page)\n",
 92 |     "print(f'page_id = {created_page[\"id\"]}')"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "from notion_client import Client\n",
102 |     "notion = Client(auth=notion_api_key)\n",
103 |     "parent = {\"type\": \"database_id\", \"database_id\": database_id}\n",
104 |     "properties = {\"Title\": {\"title\": [{\"text\": {\"content\": \"Page with image\"}}]}}\n",
105 |     "image_block = {\n",
106 |     "    \"object\": \"block\",\n",
107 |     "    \"type\": \"image\",\n",
108 |     "    \"image\": {\n",
109 |     "        \"type\": \"external\",\n",
110 |     "        \"external\": {\n",
111 |     "            \"url\": \"https://raw.githubusercontent.com/selfboot/html2notion/master/demos/notion_templage.png\"\n",
112 |     "        }\n",
113 |     "    }\n",
114 |     "}\n",
115 |     "\n",
116 |     "children = [image_block]\n",
117 |     "\n",
118 |     "created_page = notion.pages.create(\n",
119 |     "    parent=parent,\n",
120 |     "    properties=properties,\n",
121 |     "    children=children\n",
122 |     ")\n",
123 |     "\n",
124 |     "from typing import Dict, Any, cast\n",
125 |     "created_page = cast(Dict[str, Any], created_page)\n",
126 |     "print(f'page_id = {created_page[\"id\"]}')\n"
127 |    ]
128 |   }
129 |  ],
130 |  "metadata": {
131 |   "kernelspec": {
132 |    "display_name": "openai",
133 |    "language": "python",
134 |    "name": "python3"
135 |   },
136 |   "language_info": {
137 |    "codemirror_mode": {
138 |     "name": "ipython",
139 |     "version": 3
140 |    },
141 |    "file_extension": ".py",
142 |    "mimetype": "text/x-python",
143 |    "name": "python",
144 |    "nbconvert_exporter": "python",
145 |    "pygments_lexer": "ipython3",
146 |    "version": "3.11.2"
147 |   },
148 |   "orig_nbformat": 4
149 |  },
150 |  "nbformat": 4,
151 |  "nbformat_minor": 2
152 | }
153 | 


--------------------------------------------------------------------------------
/examples/insert_todo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "Initialize the API key and database ID of Notion."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import json\n",
 18 |     "config = {}\n",
 19 |     "with open('../.config.json', \"r\") as f:\n",
 20 |     "    json_conf = json.load(f)\n",
 21 |     "    config.update(json_conf)\n",
 22 |     "\n",
 23 |     "notion_api_key = config['notion']['api_key']\n",
 24 |     "database_id = config['notion']['database_id']\n",
 25 |     "\n",
 26 |     "print(notion_api_key, database_id)"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "attachments": {},
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "Ref\n",
 35 |     "https://developers.notion.com/reference/block#to-do"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "from notion_client import Client\n",
 45 |     "notion = Client(auth=notion_api_key)\n",
 46 |     "parent = {\"type\": \"database_id\", \"database_id\": database_id}\n",
 47 |     "\n",
 48 |     "properties = {\"Title\": {\"title\": [{\"text\": {\"content\": \"Page with todo\"}}]}}\n",
 49 |     "\n",
 50 |     "todo_block = [{\n",
 51 |     "  \"type\": \"to_do\",\n",
 52 |     "  \"to_do\": {\n",
 53 |     "    \"rich_text\": [{\n",
 54 |     "      \"type\": \"text\",\n",
 55 |     "      \"text\": {\n",
 56 |     "        \"content\": \"Finish Q3 goals\",\n",
 57 |     "      }\n",
 58 |     "    },\n",
 59 |     "    {\n",
 60 |     "      \"plain_text\": \"Goals detail:\\n\",\n",
 61 |     "      \"text\": {\n",
 62 |     "          \"content\": \"Goals detail:\\n\"\n",
 63 |     "      },\n",
 64 |     "      \"type\": \"text\"\n",
 65 |     "    },\n",
 66 |     "    {\n",
 67 |     "        \"plain_text\": \"You are a helpful assistant.\",\n",
 68 |     "        \"text\": {\n",
 69 |     "            \"content\": \"You are a helpful assistant.\"\n",
 70 |     "        },\n",
 71 |     "        \"type\": \"text\",\n",
 72 |     "        \"annotations\": {\n",
 73 |     "            \"color\": \"green\"\n",
 74 |     "        }\n",
 75 |     "    }\n",
 76 |     "    ],\n",
 77 |     "    \"checked\": False,\n",
 78 |     "    \"color\": \"default\"\n",
 79 |     "  }\n",
 80 |     "}]\n",
 81 |     "\n",
 82 |     "created_page = notion.pages.create(\n",
 83 |     "    parent=parent,\n",
 84 |     "    properties=properties,\n",
 85 |     "    children=todo_block\n",
 86 |     ")\n",
 87 |     "\n",
 88 |     "from typing import Dict, Any, cast\n",
 89 |     "created_page = cast(Dict[str, Any], created_page)\n",
 90 |     "print(f'page_id = {created_page[\"id\"]}')"
 91 |    ]
 92 |   }
 93 |  ],
 94 |  "metadata": {
 95 |   "kernelspec": {
 96 |    "display_name": "openai",
 97 |    "language": "python",
 98 |    "name": "python3"
 99 |   },
100 |   "language_info": {
101 |    "codemirror_mode": {
102 |     "name": "ipython",
103 |     "version": 3
104 |    },
105 |    "file_extension": ".py",
106 |    "mimetype": "text/x-python",
107 |    "name": "python",
108 |    "nbconvert_exporter": "python",
109 |    "pygments_lexer": "ipython3",
110 |    "version": "3.11.0"
111 |   },
112 |   "orig_nbformat": 4
113 |  },
114 |  "nbformat": 4,
115 |  "nbformat_minor": 2
116 | }
117 | 


--------------------------------------------------------------------------------
/examples/parse_code.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from bs4 import BeautifulSoup, NavigableString\n",
 10 |     "\n",
 11 |     "html_doc = \"\"\"\n",
 12 |     "                          <pre\n",
 13 |     "                              style=\"\n",
 14 |     "                                box-sizing: border-box;\n",
 15 |     "                                font-family: 'Roboto Mono', sfmono-regular,\n",
 16 |     "                                  consolas, 'liberation mono', menlo, courier,\n",
 17 |     "                                  monospace;\n",
 18 |     "                                background: rgb(32, 33, 35) none repeat scroll\n",
 19 |     "                                  0% 0% / auto padding-box border-box;\n",
 20 |     "                                color: rgb(255, 255, 255);\n",
 21 |     "                                overflow-x: auto;\n",
 22 |     "                                border-bottom-left-radius: 4px;\n",
 23 |     "                                border-bottom-right-radius: 4px;\n",
 24 |     "                                margin: 0px;\n",
 25 |     "                                min-height: 44px;\n",
 26 |     "                                padding: 12px 16px;\n",
 27 |     "                                font-size: 15px;\n",
 28 |     "                                line-height: 24px;\n",
 29 |     "                                border-top-left-radius: 4px;\n",
 30 |     "                                border-top-right-radius: 4px;\n",
 31 |     "                              \"\n",
 32 |     "                            ><code style=\"box-sizing:border-box;font-family:&quot;Roboto Mono&quot;, sfmono-regular, consolas, &quot;liberation mono&quot;, menlo, courier, monospace;white-space:pre;\">\n",
 33 |     "                              <code style=\"box-sizing:border-box;float:left;font-family:&quot;Roboto Mono&quot;, sfmono-regular, consolas, &quot;liberation mono&quot;, menlo, courier, monospace;padding-right:16px;\"><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">1\n",
 34 |     "</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">2\n",
 35 |     "</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">3\n",
 36 |     "</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">4\n",
 37 |     "</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">5\n",
 38 |     "</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">6\n",
 39 |     "</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">7\n",
 40 |     "</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">8\n",
 41 |     "</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">9\n",
 42 |     "</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">10\n",
 43 |     "</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">11\n",
 44 |     "</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">12\n",
 45 |     "</span></code>\n",
 46 |     "<span style=\"box-sizing:border-box;color:rgba(255, 255, 255, 0.5);\"># Note: you need to be using OpenAI Python v0.27.0 for the code below to work</span><span style=\"box-sizing:border-box;\"\n",
 47 |     "/><span style=\"box-sizing:border-box;\"/><span style=\"box-sizing:border-box;color:rgb(46, 149, 211);\">import</span><span style=\"box-sizing:border-box;\"> openai\n",
 48 |     "</span>\n",
 49 |     "openai.ChatCompletion.create(\n",
 50 |     "<span style=\"box-sizing:border-box;\">  model=</span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"gpt-3.5-turbo\"</span><span style=\"box-sizing:border-box;\">,\n",
 51 |     "</span>  messages=[\n",
 52 |     "<span style=\"box-sizing:border-box;\">        {</span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"role\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"system\"</span><span style=\"box-sizing:border-box;\">, </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"content\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"You are a helpful assistant.\"</span><span style=\"box-sizing:border-box;\">},\n",
 53 |     "</span><span style=\"box-sizing:border-box;\">        {</span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"role\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"user\"</span><span style=\"box-sizing:border-box;\">, </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"content\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"Who won the world series in 2020?\"</span><span style=\"box-sizing:border-box;\">},\n",
 54 |     "</span><span style=\"box-sizing:border-box;\">        {</span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"role\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"assistant\"</span><span style=\"box-sizing:border-box;\">, </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"content\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"The Los Angeles Dodgers won the World Series in 2020.\"</span><span style=\"box-sizing:border-box;\">},\n",
 55 |     "</span><span style=\"box-sizing:border-box;\">        {</span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"role\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"user\"</span><span style=\"box-sizing:border-box;\">, </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"content\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"Where was it played?\"</span><span style=\"box-sizing:border-box;\">}\n",
 56 |     "</span>    ]\n",
 57 |     ")</code></pre>\n",
 58 |     "\"\"\"\n",
 59 |     "soup = BeautifulSoup(html_doc, 'html.parser')\n",
 60 |     "\n",
 61 |     "# 找到所有的<pre>标签\n",
 62 |     "pre_tags = soup.find_all('pre')\n",
 63 |     "\n",
 64 |     "for pre in pre_tags:\n",
 65 |     "    # 在每个<pre>标签中找到<code>标签\n",
 66 |     "    code_tags = pre.find_all('code')\n",
 67 |     "    \n",
 68 |     "    for code in code_tags:\n",
 69 |     "        # 检查<code>标签是否包含行号，这里假设行号是在<span>标签中的数字\n",
 70 |     "        span_tags = code.find_all('span')\n",
 71 |     "        \n",
 72 |     "        for span in span_tags:\n",
 73 |     "            if span.string and span.string.strip().isdigit():\n",
 74 |     "                # 如果是行号，则删除这个<span>标签\n",
 75 |     "                span.decompose()\n",
 76 |     "\n",
 77 |     "# 这时，soup中的HTML已经没有行号了\n",
 78 |     "print(soup.prettify())\n"
 79 |    ]
 80 |   }
 81 |  ],
 82 |  "metadata": {
 83 |   "kernelspec": {
 84 |    "display_name": "notion",
 85 |    "language": "python",
 86 |    "name": "python3"
 87 |   },
 88 |   "language_info": {
 89 |    "codemirror_mode": {
 90 |     "name": "ipython",
 91 |     "version": 3
 92 |    },
 93 |    "file_extension": ".py",
 94 |    "mimetype": "text/x-python",
 95 |    "name": "python",
 96 |    "nbconvert_exporter": "python",
 97 |    "pygments_lexer": "ipython3",
 98 |    "version": "3.11.2"
 99 |   },
100 |   "orig_nbformat": 4
101 |  },
102 |  "nbformat": 4,
103 |  "nbformat_minor": 2
104 | }
105 | 


--------------------------------------------------------------------------------
/examples/parse_tag.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from bs4 import BeautifulSoup, NavigableString\n",
 10 |     "\n",
 11 |     "html = '''\n",
 12 |     "<div>\n",
 13 |     "    <div><br /></div>\n",
 14 |     "    <table>\n",
 15 |     "        <tbody>\n",
 16 |     "            <tr>\n",
 17 |     "                <td>\n",
 18 |     "                    <div>Row 1: <span style=\"color: rgb(0, 166, 125);\">You are a helpful assistant.</span> Remember it.</div>\n",
 19 |     "                </td>\n",
 20 |     "                <td>\n",
 21 |     "                    <div>Row 1: <a href=\"https://platform.openai.com/docs/guides/chat/introduction\">https://platform.openai.com/docs/guides/chat/introduction</a></div>\n",
 22 |     "                </td>\n",
 23 |     "                <td>\n",
 24 |     "                    <div>Row 1: <b><u>Import Content</u></b> Read more.</div>\n",
 25 |     "                </td>\n",
 26 |     "            </tr>\n",
 27 |     "            <tr>\n",
 28 |     "                <td><div>Row 2: </div></td>\n",
 29 |     "                <td><div>Row 2: </div></td>\n",
 30 |     "                <td><div>Row 2: </div></td>\n",
 31 |     "            </tr>\n",
 32 |     "        </tbody>\n",
 33 |     "    </table>\n",
 34 |     "    <div><br /></div>\n",
 35 |     "</div>\n",
 36 |     "'''\n",
 37 |     "\n",
 38 |     "def extract_text_and_parents(tag, parents=[]):\n",
 39 |     "    results = []\n",
 40 |     "    for child in tag.children:\n",
 41 |     "        if isinstance(child, NavigableString):\n",
 42 |     "            if child.strip():\n",
 43 |     "                text = child.strip()\n",
 44 |     "                parent_tags = [{\"name\": p.name, \"attrs\": p.attrs} for p in parents + [tag]]\n",
 45 |     "                results.append({\"text\": text, \"parent_tags\": parent_tags})\n",
 46 |     "        else:\n",
 47 |     "            results.extend(extract_text_and_parents(child, parents + [tag]))\n",
 48 |     "    return results\n",
 49 |     "\n",
 50 |     "soup = BeautifulSoup(html, 'html.parser')\n",
 51 |     "td_tags = soup.find_all('td')\n",
 52 |     "\n",
 53 |     "for i, td in enumerate(td_tags, 1):\n",
 54 |     "    text_with_parents = extract_text_and_parents(td)\n",
 55 |     "    print(f\"Text and parent tags in TD {i}:\")\n",
 56 |     "    for item in text_with_parents:\n",
 57 |     "        print(f\"Text: {item['text']}\")\n",
 58 |     "        print(\"Parent tags:\")\n",
 59 |     "        for parent in item[\"parent_tags\"]:\n",
 60 |     "            print(f\"  Tag: {parent['name']}, Attributes: {parent['attrs']}\")\n",
 61 |     "    print()\n"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "from bs4 import BeautifulSoup\n",
 71 |     "from html2notion.translate.html2json_base import Html2JsonBase\n",
 72 |     "content = \"\"\" <p style=\" line-height: 160%; box-sizing: content-box; margin: 10px 0; color: #333; \" > web image:<br /> <img src=\"https://raw.githubusercontent.com/selfboot/html2notion/master/demos/notion_templage.png\" style=\" line-height: 160%; margin: 4px 0 10px; box-sizing: border-box; vertical-align: top; max-width: 100%; \" /> </p>\n",
 73 |     "\"\"\"\n",
 74 |     "tag = BeautifulSoup(content, 'html.parser').find('p')\n",
 75 |     "text_and_parents = Html2JsonBase.extract_text_and_parents(tag)\n",
 76 |     "for item in text_and_parents:\n",
 77 |     "    print(f\"Text: {item[0]}, {item[1]}\")"
 78 |    ]
 79 |   }
 80 |  ],
 81 |  "metadata": {
 82 |   "kernelspec": {
 83 |    "display_name": "notion",
 84 |    "language": "python",
 85 |    "name": "python3"
 86 |   },
 87 |   "language_info": {
 88 |    "codemirror_mode": {
 89 |     "name": "ipython",
 90 |     "version": 3
 91 |    },
 92 |    "file_extension": ".py",
 93 |    "mimetype": "text/x-python",
 94 |    "name": "python",
 95 |    "nbconvert_exporter": "python",
 96 |    "pygments_lexer": "ipython3",
 97 |    "version": "3.11.2"
 98 |   },
 99 |   "orig_nbformat": 4
100 |  },
101 |  "nbformat": 4,
102 |  "nbformat_minor": 2
103 | }
104 | 


--------------------------------------------------------------------------------
/examples/process_md.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "md_text = \"\"\"\n",
 10 |     "# Header\n",
 11 |     "\n",
 12 |     "**bold**, _ite_, ~~other~~, more...\n",
 13 |     "`inline code` here.\n",
 14 |     "\n",
 15 |     "```python\n",
 16 |     "import os\n",
 17 |     "os.print('hello')\n",
 18 |     "```\n",
 19 |     "\n",
 20 |     "> Please work through this document in its entirety to better understand how OpenAI’s rate limit system works. We include code examples and possible solutions to handle common issues. It is recommended to **follow** this guidance before filling out the [Rate Limit Increase Request form](https://docs.google.com/forms/d/e/1FAIpQLSc6gSL3zfHFlL6gNIyUcjkEv29jModHGxg5_XGyr-PrE2LaHw/viewform) with details regarding how to fill it out in the last section.\n",
 21 |     "\n",
 22 |     "divider\n",
 23 |     "* * *\n",
 24 |     "\n",
 25 |     "### image\n",
 26 |     "local images:\n",
 27 |     "\n",
 28 |     "![846f62a6516227df1b4370aea3f63143.png](evernotecid://A2B91148-7880-4D85-A7CC-3A794B21D0F8/appyinxiangcom/186128/ENResource/p3511)\n",
 29 |     "\n",
 30 |     "web image:\n",
 31 |     "![pic](https://raw.githubusercontent.com/selfboot/html2notion/master/demos/notion_templage.png)\n",
 32 |     "\n",
 33 |     "[link](https://docs.microsoft.com/zh-tw/previous-versions/visualstudio/design-tools/expression-studio-2/cc294571(v=expression.10))\n",
 34 |     "\n",
 35 |     "### Table\n",
 36 |     "\n",
 37 |     "|header| column1 | column 2\n",
 38 |     "|-|-|-\n",
 39 |     "|row 1| row 1_1 | row 1_2\n",
 40 |     "|row 2| row 2_2 **bold**, _ite_, ~~other~~, more... | row 2_3\n",
 41 |     "\n",
 42 |     "### list\n",
 43 |     "\n",
 44 |     "[Why do we have rate limits?](https://platform.openai.com/docs/guides/rate-limits/overview)\n",
 45 |     "Rate limits are a common practice for APIs, and they're put in place for a few different reasons:\n",
 46 |     "\n",
 47 |     "- They help protect against abuse or misuse of the API. For example, a malicious actor could flood the API with requests in an attempt to overload it or cause disruptions in service. By setting rate limits, `OpenAI` can prevent this kind of activity.\n",
 48 |     "- Rate limits help ensure that everyone has fair access to the API. If one person or organization makes an excessive number of requests, it could bog down the API for everyone else. By throttling the number of requests that a single user can make, OpenAI ensures that the most number of people have an opportunity to use the API without experiencing slowdowns.\n",
 49 |     "- Rate limits can help OpenAI manage the aggregate load on its infrastructure. If requests to the API increase dramatically, it could tax the servers and cause performance issues. By setting rate limits, OpenAI can help maintain a smooth and consistent experience for all users.\n",
 50 |     "\n",
 51 |     "number list\n",
 52 |     "\n",
 53 |     "1. number list1\n",
 54 |     "2. numner list2\n",
 55 |     "\n",
 56 |     "## checkbox\n",
 57 |     "\n",
 58 |     "Three frogs\n",
 59 |     "* [x] The first frog\n",
 60 |     "* [ ] The second frog\n",
 61 |     "* [ ] The third frog\n",
 62 |     "\n",
 63 |     "# math and grapth\n",
 64 |     "\n",
 65 |     "Here is math\n",
 66 |     "```math\n",
 67 |     "e^{i\\pi} + 1 = 0\n",
 68 |     "```\n",
 69 |     "\n",
 70 |     "mermaid grapth:\n",
 71 |     "\n",
 72 |     "```mermaid\n",
 73 |     "graph TD\n",
 74 |     "A[Module A] -->|A1| B( Module B)\n",
 75 |     "B --> C{Confidition C}\n",
 76 |     "C -->|condition C1| D[Module D]\n",
 77 |     "C -->|condition C2| E[Module E]\n",
 78 |     "C -->|condition C3| F[Module F]\n",
 79 |     "```\n",
 80 |     "\n",
 81 |     "sequenceDiagram\n",
 82 |     "\n",
 83 |     "```mermaid\n",
 84 |     "sequenceDiagram\n",
 85 |     "A->>B: Have you received a message?\n",
 86 |     "B-->>A: Message received\n",
 87 |     "```\n",
 88 |     "\n",
 89 |     "gantt\n",
 90 |     "\n",
 91 |     "```mermaid\n",
 92 |     "gantt\n",
 93 |     "title Gantt chart\n",
 94 |     "dateFormat  YYYY-MM-DD\n",
 95 |     "section Proj A\n",
 96 |     "Task 1           :a1, 2018-06-06, 30d\n",
 97 |     "Task 2     :after a1  , 20d\n",
 98 |     "section Proj B\n",
 99 |     "Task 3      :2018-06-12  , 12d\n",
100 |     "Task 4      : 24d\n",
101 |     "```\n",
102 |     "\n",
103 |     "### chart\n",
104 |     "\n",
105 |     "```chart\n",
106 |     ", budget, income, expenses, debt\n",
107 |     "June,5000,8000,4000,6000\n",
108 |     "July,3000,1000,4000,3000\n",
109 |     "Aug,5000,7000,6000,3000\n",
110 |     "Sep,7000,2000,3000,1000\n",
111 |     "Oct,6000,5000,4000,2000\n",
112 |     "Nov,4000,3000,5000,\n",
113 |     "\n",
114 |     "type: pie\n",
115 |     "title: 每月收益\n",
116 |     "x.title: Amount\n",
117 |     "y.title: Month\n",
118 |     "y.suffix: $\n",
119 |     "```\n",
120 |     "\n",
121 |     "```chart\n",
122 |     ",Budget,Income,Expenses,Debt\n",
123 |     "June,5000,8000,4000,6000\n",
124 |     "July,3000,1000,4000,3000\n",
125 |     "Aug,5000,7000,6000,3000\n",
126 |     "Sep,7000,2000,3000,1000\n",
127 |     "Oct,6000,5000,4000,2000\n",
128 |     "Nov,4000,3000,5000,\n",
129 |     "\n",
130 |     "type: line\n",
131 |     "title: Monthly Revenue\n",
132 |     "x.title: Amount\n",
133 |     "y.title: Month\n",
134 |     "y.suffix: $\n",
135 |     "```\n",
136 |     "\"\"\""
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "import re\n",
146 |     "\n",
147 |     "def extract_code_blocks(md_text):\n",
148 |     "    code_pattern = re.compile(r'```(\\w+)?\\n(.*?)```', re.DOTALL)\n",
149 |     "    matches = code_pattern.findall(md_text)\n",
150 |     "    code_blocks = [{'language': match[0], 'code': match[1]} for match in matches]\n",
151 |     "    return code_blocks\n",
152 |     "\n",
153 |     "\n",
154 |     "code_blocks = extract_code_blocks(md_text)\n",
155 |     "\n",
156 |     "for block in code_blocks:\n",
157 |     "    print(f\"Language: {block['language']}\")\n",
158 |     "    print(f\"Code: {block['code']}\\n\")\n"
159 |    ]
160 |   }
161 |  ],
162 |  "metadata": {
163 |   "kernelspec": {
164 |    "display_name": "notion",
165 |    "language": "python",
166 |    "name": "python3"
167 |   },
168 |   "language_info": {
169 |    "codemirror_mode": {
170 |     "name": "ipython",
171 |     "version": 3
172 |    },
173 |    "file_extension": ".py",
174 |    "mimetype": "text/x-python",
175 |    "name": "python",
176 |    "nbconvert_exporter": "python",
177 |    "pygments_lexer": "ipython3",
178 |    "version": "3.11.2"
179 |   },
180 |   "orig_nbformat": 4
181 |  },
182 |  "nbformat": 4,
183 |  "nbformat_minor": 2
184 | }
185 | 


--------------------------------------------------------------------------------
/html2notion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selfboot/html2notion/2f02e7a465fcdb5e3a80631f7d29fcaefb195339/html2notion/__init__.py


--------------------------------------------------------------------------------
/html2notion/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | import json
  5 | import asyncio
  6 | from pathlib import Path
  7 | from aiohttp import ClientSession
  8 | from notion_client import AsyncClient
  9 | from rich.console import Console
 10 | from rich.table import Table
 11 | from rich.text import Text
 12 | from rich import box
 13 | from .utils import setup_logger, read_config, logger, config
 14 | from .translate.notion_import import NotionImporter
 15 | from .translate.batch_import import BatchImport
 16 | from .translate.import_stats import StatLevel
 17 | console = Console()
 18 | 
 19 | 
 20 | def prepare_args():
 21 |     parser = argparse.ArgumentParser(
 22 |         description='Html2notion: Save HTML to your Notion notes quickly and easily, while keeping the original format as much as possible')
 23 |     parser.add_argument('--conf', type=str, help='conf file path', required=True)
 24 |     parser.add_argument('--log', type=str, help='log directory path')
 25 |     parser.add_argument('--batch', type=int, default=15, help='batch save concurrent limit')
 26 | 
 27 |     group = parser.add_mutually_exclusive_group(required=True)
 28 |     group.add_argument('--file', type=str, help='Save single html file to notion')
 29 |     group.add_argument('--dir', type=str, help='Save all html files in the dir to notion')
 30 |     return parser
 31 | 
 32 | 
 33 | def print_single_stats(stat):
 34 |     if stat.get_level() == StatLevel.EXCEPTION.value:
 35 |         text = Text(f"Failed to import {stat.filename}", style="default")
 36 |         text.append(f"\nException: {stat.exception}", style="red")
 37 |         if 'body.parent.page_id should be defined' in str(stat.exception):
 38 |             text.append(f"\nHeadmeta : \n{json.dumps(stat.head_meta, indent=4)}", style="yellow")
 39 |         console.print(text)
 40 |         return
 41 |     
 42 |     title = f"{stat.filename}" if stat.filename else "Import Result (Loss filename)"
 43 |     style = "default"
 44 |     if stat.get_level() == StatLevel.LOSS.value:
 45 |         title += " (Loss some content)"
 46 |         style = "yellow"
 47 |     elif stat.get_level() == StatLevel.SUCC.value:
 48 |         title += "(Import successfully)"
 49 |         style = "green"
 50 |         
 51 |     table = Table(title=title, title_style=style, expand=True, box=box.HEAVY_HEAD, show_lines=True)
 52 |     table.add_column("Item", justify="right", style="default")
 53 |     table.add_column("Html", style="default")
 54 |     table.add_column("Notion", justify="left", style="default")
 55 |     table.add_row("Text Len", str(stat.text_count), str(stat.notion_text_count))
 56 |     table.add_row("Image Count", str(stat.image_count), str(stat.notion_image_count))
 57 |     if stat.skip_tag:
 58 |         table.add_row("Skip Tag Count", "", 'Detail: [yellow]' + ";".join([repr(s)
 59 |                       for s in stat.skip_tag])[:2000] + "[/yellow]")
 60 |  
 61 |     console.print(table)
 62 | 
 63 | 
 64 | def print_batch_stats(batch_import):
 65 |     all_files = batch_import.all_files
 66 |     batch_stats = batch_import.batch_stats
 67 |     success_stats = [stat for stat in batch_stats if not stat.get_level() == StatLevel.SUCC.value]
 68 |     if len(success_stats) == len(all_files):
 69 |         console.print(f"All files migrated successfully and there is no data loss.", style="green")
 70 | 
 71 |     failed_stats = [stat for stat in batch_stats if stat.get_level() == StatLevel.EXCEPTION.value]
 72 |     if failed_stats:
 73 |         table = Table(title=f"\nImport Fail Exception Detail\nLog path: {config.get('log_path')}", expand=True, box=box.HEAVY_HEAD, show_lines=True)
 74 |         table.add_column("File Name", justify="left", style="default")
 75 |         table.add_column("Fail Reason", justify="left", style="default")
 76 | 
 77 |         for stat in failed_stats:
 78 |             table.add_row(str(stat.filename), str(stat))
 79 |         console.print(table)
 80 | 
 81 |     less_stats = [stat for stat in batch_stats if stat.get_level() == StatLevel.LOSS.value]
 82 |     if less_stats:
 83 |         table = Table(title=f"\nImport Data Loss Detail (You can use --file to import single file for more info)\n", expand=True, box=box.HEAVY_HEAD, show_lines=True)
 84 |         table.add_column("File Name", justify="left", style="default")
 85 |         table.add_column("Loss Detail", justify="left", style="default")
 86 | 
 87 |         for stat in less_stats:
 88 |             table.add_row(str(stat.filename), str(stat))
 89 |         console.print(table)
 90 | 
 91 | 
 92 | 
 93 | def prepare_env(args: argparse.Namespace):
 94 |     log_path = Path(args.log) if args.log else Path.cwd() / 'logs/'
 95 |     if not log_path.is_dir():
 96 |         log_path.mkdir(parents=True)
 97 | 
 98 |     conf_path = Path(args.conf)
 99 |     if not conf_path.is_file():
100 |         text = Text(f"Read conf {conf_path} failed.", style="red")
101 |         console.print(text)
102 |         sys.exit(1)
103 | 
104 |     setup_logger(log_path)
105 |     read_config(conf_path)
106 |     logger.info(f"Read log {log_path}, conf {conf_path}")
107 | 
108 | 
109 | async def import_single_file(file):
110 |     notion_api_key = ""
111 |     if 'GITHUB_ACTIONS' in os.environ:
112 |         notion_api_key = os.environ['notion_api_key']
113 |     else:
114 |         notion_api_key = config['notion']['api_key']
115 |     async with ClientSession() as session:
116 |         async with AsyncClient(auth=notion_api_key) as notion_client:
117 |             notion_importer = NotionImporter(session, notion_client)
118 |             await notion_importer.process_file(file)
119 |             return notion_importer.import_stats
120 | 
121 | 
122 | def main():
123 |     arg_parse = prepare_args()
124 |     args = arg_parse.parse_args()
125 |     prepare_env(args)
126 | 
127 |     text = Text("")
128 |     file_path = Path(args.file) if args.file else None
129 |     dir_path = Path(args.dir) if args.dir else None
130 |     max_concurrency = args.batch
131 |     if file_path and file_path.is_file():
132 |         stats = asyncio.run(import_single_file(file_path))
133 |         print_single_stats(stats)
134 |     elif dir_path and dir_path.is_dir():
135 |         logger.info(f"Begin save all html files in the dir: {dir_path}.")
136 |         batch_import = BatchImport(dir_path, max_concurrency)
137 |         result = asyncio.run(batch_import.process_directory())
138 |         logger.info(f"Finish save all html files in the dir: {dir_path}.\n{result}")
139 |         print_batch_stats(batch_import)
140 |     else:
141 |         text.append("The parameters provided are incorrect, please check.", style="red")
142 |         text.append(f"\n{arg_parse.format_help()}", style="default")
143 | 
144 |     text.append("\nIf you need help, please submit an ", style="default")
145 |     link = Text("issue", style="cyan underline link https://github.com/selfboot/html2notion/issues")
146 |     text.append(link)
147 |     text.append(" on gitHub.", style="default")
148 |     console.print(text)
149 |     return
150 | 
151 | 
152 | if __name__ == '__main__':
153 |     main()
154 | 


--------------------------------------------------------------------------------
/html2notion/translate/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selfboot/html2notion/2f02e7a465fcdb5e3a80631f7d29fcaefb195339/html2notion/translate/__init__.py


--------------------------------------------------------------------------------
/html2notion/translate/batch_import.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import aiohttp
 3 | import os
 4 | from pathlib import Path
 5 | from asyncio import Lock
 6 | from notion_client import AsyncClient
 7 | from rich.progress import (
 8 |     BarColumn,
 9 |     MofNCompleteColumn,
10 |     Progress,
11 |     TextColumn,
12 |     TimeRemainingColumn,
13 | )
14 | from ..translate.notion_import import NotionImporter
15 | from ..utils import logger, config
16 | 
17 | 
18 | class BatchImport:
19 |     def __init__(self, directory: Path, concurrent_limit: int = 10):
20 |         self.directory = directory
21 |         self.concurrent_limit = concurrent_limit
22 |         if 'GITHUB_ACTIONS' in os.environ:
23 |             self.notion_api_key = os.environ['notion_api_key']
24 |         else:
25 |             self.notion_api_key = config['notion']['api_key']
26 |         self.notion_client = AsyncClient(auth=self.notion_api_key)
27 |         self.batch_stats = []
28 |         self.files_lock = Lock()
29 | 
30 |     @staticmethod
31 |     async def process_file(session, notion_client, file_path, files_lock, batch_stats):
32 |         logger.info(f"Begin file, file {file_path}")
33 |         notion_import = NotionImporter(session, notion_client)
34 |         response = await notion_import.process_file(file_path)
35 |         logger.info(f"Finish file {file_path}, status {str(notion_import.import_stats)}")
36 |         async with files_lock:
37 |             batch_stats.append(notion_import.import_stats)
38 |         return response
39 |        
40 |     async def process_directory(self):
41 |         semaphore = asyncio.Semaphore(self.concurrent_limit)
42 |         self.all_files = [file_path for file_path in self.directory.glob('*.html') if file_path.name != 'index.html']
43 |         files_len = len(self.all_files)
44 | 
45 |         with Progress(
46 |             TextColumn("[progress.description]{task.description}", justify="right"),
47 |             BarColumn(),
48 |             MofNCompleteColumn(),
49 |             TextColumn(" "),
50 |             TimeRemainingColumn()
51 |         ) as progress:
52 |             # with Progress() as progress:
53 |             progress.add_task("[cyan]Total", total=files_len,
54 |                               completed=files_len, update_period=0, style="cyan")
55 |             success_task_id = progress.add_task(
56 |                 "[green]Success", total=files_len, style="green")
57 |             failed_task_id = progress.add_task("[red]Failed", total=files_len, style="red")
58 |             async def process_file_with_semaphore(session, notion_client, file_path):
59 |                 async with semaphore:
60 |                     result = await self.process_file(session, notion_client, file_path, self.files_lock, self.batch_stats)
61 |                     if result == "succ":
62 |                         progress.update(success_task_id, advance=1)
63 |                     else:
64 |                         progress.update(failed_task_id, advance=1)
65 |                     return result
66 | 
67 |             async with aiohttp.ClientSession() as session:
68 |                 tasks = [process_file_with_semaphore(session, self.notion_client, file_path) for file_path in self.all_files]
69 |                 results = await asyncio.gather(*tasks)
70 |                 await session.close()
71 |                 return results
72 | 
73 | 
74 | if __name__ == '__main__':
75 |     from ..utils import test_prepare_conf
76 |     test_prepare_conf()
77 |     from tempfile import TemporaryDirectory
78 |     with TemporaryDirectory() as temp_dir:
79 |         temp_dir_path = Path(temp_dir)
80 |         files = []
81 |         for i in range(100):
82 |             temp_file = temp_dir_path / f"file{i}.txt"
83 |             temp_file.write_text("main_hold")
84 |             files.append(temp_file)
85 | 
86 |         max_concurrency = 2
87 |         batch_import = BatchImport(temp_dir_path, max_concurrency)
88 |         result = asyncio.run(batch_import.process_directory())
89 |         print(result)
90 | 


--------------------------------------------------------------------------------
/html2notion/translate/cos_uploader.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from qcloud_cos import CosConfig
 3 | from qcloud_cos import CosS3Client
 4 | from qcloud_cos.cos_exception import CosClientError
 5 | from functools import partial
 6 | from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 7 | from ..utils import logger, test_prepare_conf, config
 8 | 
 9 | class TencentCosUploaderAsync:
10 |     def __init__(self, secret_id, secret_key, region, bucket, timeout=60):
11 |         self.config = CosConfig(Region=region, SecretId=secret_id, SecretKey=secret_key, Timeout=timeout)
12 |         self.client = CosS3Client(self.config)
13 |         self.bucket = bucket
14 | 
15 |     @retry(stop=stop_after_attempt(5),
16 |            wait=wait_exponential(multiplier=1, min=3, max=30),
17 |            retry=retry_if_exception_type(CosClientError))
18 |     async def upload_file(self, loop, local_path, key):
19 |         with open(local_path, 'rb') as f:
20 |             content = f.read()
21 | 
22 |         executor = loop.run_in_executor
23 |         put_object_partial = partial(self.client.put_object, Bucket=self.bucket, Body=content, Key=key)
24 |         response = await executor(None, put_object_partial)
25 |         return response
26 | 
27 |     @retry(stop=stop_after_attempt(5),
28 |            wait=wait_exponential(multiplier=1, min=3, max=30),
29 |            retry=retry_if_exception_type(CosClientError))
30 |     async def check_file_exist(self, loop, key):
31 |         try:
32 |             executor = loop.run_in_executor
33 |             return await executor(None, self.client.object_exists, self.bucket, key)
34 |         except Exception as e:
35 |             logger.error(e)
36 |             return False
37 | 
38 |     @retry(stop=stop_after_attempt(5),
39 |            wait=wait_exponential(multiplier=1, min=3, max=30),
40 |            retry=retry_if_exception_type(CosClientError))
41 |     async def delete_file(self, loop, key):
42 |         executor = loop.run_in_executor
43 |         response = await executor(None, self.client.delete_object, self.bucket, key)
44 |         return response
45 | 
46 | 
47 | async def main():
48 |     test_prepare_conf()
49 | 
50 |     try:
51 |         secret_id = config["cos"]["secret_id"]
52 |         secret_key = config["cos"]["secret_key"]
53 |         region = config["cos"]["region"]
54 |         bucket = config["cos"]["bucket"]
55 |     except Exception as e:
56 |         print(f"Please fill cos conf in the config file")
57 |         return
58 | 
59 |     local_path = './demos/saul.webp'
60 |     key = 'test/saul.webp'
61 | 
62 |     uploader = TencentCosUploaderAsync(secret_id, secret_key, region, bucket)
63 |     loop = asyncio.get_event_loop()
64 | 
65 |     upload_response = await uploader.upload_file(loop, local_path, key)
66 |     print(f"Upload response: {upload_response}")
67 | 
68 |     if await uploader.check_file_exist(loop, key):
69 |         print("Upload successful!")
70 |     else:
71 |         print("Upload failed!")
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     asyncio.run(main())
76 | 


--------------------------------------------------------------------------------
/html2notion/translate/html2json.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import chardet
  3 | import time
  4 | from functools import singledispatch
  5 | from pathlib import Path
  6 | from bs4 import BeautifulSoup, Tag
  7 | from ..utils import logger, test_prepare_conf
  8 | from ..translate.html2json_base import Html2JsonBase
  9 | from ..translate.html2json_default import Default_Type
 10 | from ..translate.html2json_yinxiang import YinXiang_Type
 11 | from ..translate.html2json_clipper import YinXiangClipper_Type
 12 | from ..translate.html2json_markdown import YinXiangMarkdown_Type
 13 | 
 14 | 
 15 | """
 16 | <meta name="source" content="yinxiang.superNote"/>
 17 | <meta name="source" content="desktop.mac"/>
 18 | <meta name="source" content="mobile.android"/>
 19 | """
 20 | def _is_yinxiang_export_html(html_soup, import_stat):
 21 |     meta_source = html_soup.select_one('html > head > meta[name="source"]')
 22 |     meta_source_content = meta_source.get('content', "") if isinstance(meta_source, Tag) else ""
 23 |     if not meta_source_content:
 24 |         return False
 25 |     yinxiang_source_content = ["yinxiang", "desktop", "mobile"]
 26 |     import_stat.head_meta["source"] = meta_source_content
 27 |     for prefix in yinxiang_source_content:
 28 |         if isinstance(meta_source_content, str) and meta_source_content.startswith(prefix):
 29 |             return True
 30 |     return False
 31 | 
 32 | 
 33 | """
 34 | <meta name="source-application" content="webclipper.evernote" />
 35 | <meta name="source-application" content="微信" />
 36 | """
 37 | def _is_yinxiang_clipper_html(html_soup, import_stat):
 38 |     meta_source_application = html_soup.select_one('html > head > meta[name="source-application"]')
 39 |     source_application = meta_source_application.get('content', "") if isinstance(meta_source_application, Tag) else ""
 40 |     if not source_application:
 41 |         return False
 42 |     import_stat.head_meta["source-application"] = source_application
 43 |     if isinstance(source_application, str) and source_application.endswith("evernote"):
 44 |         return True
 45 |     if isinstance(source_application, str) and source_application in ["微信",]:
 46 |         return True
 47 |     return False
 48 | 
 49 | 
 50 | """
 51 | <meta name="content-class" content="yinxiang.markdown" />
 52 | """
 53 | def _is_yinxiang_markdown_html(html_soup, import_stat):
 54 |     meta_content_class = html_soup.select_one('html > head > meta[name="content-class"]')
 55 |     content_class = meta_content_class.get('content', "") if isinstance(meta_content_class, Tag) else ""
 56 |     if not content_class:
 57 |         return False
 58 |     import_stat.head_meta["content_class"] = content_class
 59 |     if isinstance(content_class, str) and content_class.endswith("markdown"):
 60 |         return True
 61 |     return False
 62 | 
 63 | 
 64 | # <meta name="exporter-version" content="YXBJ Windows/607246 (zh-CN, DDL); Windows/10.0.0 (Win64); EDAMVersion=V2;"/>
 65 | # <meta name="exporter-version" content="Evernote Mac 9.6.8 (470886)"/>
 66 | def _infer_input_type(html_content, import_stat):
 67 |     soup = BeautifulSoup(html_content, 'html.parser')
 68 |     exporter_version_meta = soup.select_one('html > head > meta[name="exporter-version"]')
 69 |     exporter_version_content = exporter_version_meta.get('content', "") if isinstance( exporter_version_meta, Tag) else ""
 70 |     import_stat.head_meta["exporter-version"] = exporter_version_content
 71 |     exporter_version = exporter_version_content if isinstance(exporter_version_content, str) else ""
 72 |     if exporter_version.startswith("Evernote") or exporter_version.startswith("YXBJ"):
 73 |         if _is_yinxiang_markdown_html(soup, import_stat):
 74 |             return YinXiangMarkdown_Type
 75 |         if _is_yinxiang_clipper_html(soup, import_stat):
 76 |             return YinXiangClipper_Type
 77 |         elif _is_yinxiang_export_html(soup, import_stat):
 78 |             return YinXiang_Type
 79 | 
 80 |         return YinXiang_Type # default
 81 |     
 82 |     return Default_Type
 83 | 
 84 | 
 85 | def _get_converter(html_content, import_stat):
 86 |     html_type = _infer_input_type(html_content, import_stat)
 87 |     import_stat.head_meta["parse_type"] = html_type
 88 |     logger.info(f"Input type: {html_type}")
 89 |     converter = Html2JsonBase.create(html_type, html_content, import_stat)
 90 |     return converter
 91 | 
 92 | 
 93 | @singledispatch
 94 | def html2json_process(html_content, import_stat):
 95 |     raise TypeError(f"Unsupported {type(html_content)}, {import_stat}")
 96 | 
 97 | 
 98 | @html2json_process.register
 99 | def _(html_content: str, import_stat):
100 |     converter = _get_converter(html_content, import_stat)
101 |     result = converter.process()
102 |     return converter.get_notion_data(), result
103 | 
104 | 
105 | @html2json_process.register
106 | def _(html_file: Path, import_stat):
107 |     if not html_file.is_file():
108 |         print(f"Load file: {html_file.resolve()} failed")
109 |         raise FileNotFoundError
110 | 
111 |     html_content = ""
112 |     with html_file.open('rb') as f:
113 |         data = f.read()
114 |         result = chardet.detect(data)
115 |         encoding = result['encoding'] if result['encoding'] else 'utf-8'
116 |         html_content = data.decode(encoding)
117 | 
118 |         if html_content == "main_hold":                  # just for local debug
119 |             time.sleep(1)
120 |             return "main_hold"
121 | 
122 |     converter = _get_converter(html_content, import_stat)
123 |     result = converter.process()
124 |     return converter.get_notion_data(), result
125 | 
126 | 
127 | if __name__ == "__main__":
128 |     test_prepare_conf()
129 |     html_file = Path("./demos/Test Case D.html")
130 |     result, html_type = html2json_process(html_file)
131 |     print(html_type)
132 |     print(json.dumps(result, indent=4, ensure_ascii=False))
133 |     result2, html_type2 = html2json_process(
134 |         "<html><body><div>test</div></body></html>")
135 |     print(html_type2)
136 |     print(json.dumps(result2, indent=4, ensure_ascii=False))
137 | 


--------------------------------------------------------------------------------
/html2notion/translate/html2json_base.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import os
  3 | import copy
  4 | from collections import namedtuple
  5 | from bs4 import NavigableString, Tag, PageElement
  6 | from enum import Enum
  7 | from ..utils import logger, config, is_valid_url
  8 | 
  9 | class Block(Enum):
 10 |     FAIL = "fail"
 11 |     PARAGRAPH = "paragraph"
 12 |     QUOTE = "quote"
 13 |     NUMBERED_LIST = "numbered_list_item"
 14 |     BULLETED_LIST = "bulleted_list_item"
 15 |     HEADING = "heading"
 16 |     CODE = "code"
 17 |     DIVIDER = "divider"
 18 |     TABLE = "table"
 19 |     TO_DO = "to_do"
 20 |     EQUATION = "equation"
 21 | 
 22 | 
 23 | class Html2JsonBase:
 24 |     # https://developers.notion.com/reference/request-limits
 25 |     URL_MAX_LENGTH = 2000
 26 |     TEXT_MAX_LENGTH = 2000
 27 |     EXPRESSION_MAX_LENGTH = 1000
 28 |     RICHTEXT_ARRAY_LENGTH = 100
 29 | 
 30 |     _registry = {}
 31 |     _text_annotations = {
 32 |         "bold": bool,
 33 |         "italic": bool,
 34 |         "strikethrough": bool,
 35 |         "underline": bool,
 36 |         "code": bool,
 37 |         "color": str,
 38 |     }
 39 | 
 40 |     _language = {"abap", "agda", "arduino",
 41 |     "assembly", "bash", "basic", "bnf", "c", "c#", "c++", "clojure", "coffeescript", "coq", "css",
 42 |     "dart", "dhall", "diff", "docker", "ebnf", "elixir", "elm", "erlang", "f#", "flow", "fortran",
 43 |     "gherkin", "glsl", "go", "graphql", "groovy", "haskell", "html", "idris", "java", "javascript",
 44 |     "json", "julia", "kotlin", "latex", "less", "lisp", "livescript", "llvm ir", "lua", "makefile",
 45 |     "markdown", "markup", "matlab", "mathematica", "mermaid", "nix", "objective-c", "ocaml", "pascal",
 46 |     "perl", "php", "plain text", "powershell", "prolog", "protobuf", "purescript", "python", "r",
 47 |     "racket", "reason", "ruby", "rust", "sass", "scala", "scheme", "scss", "shell", "solidity", "sql",
 48 |     "swift", "toml", "typescript", "vb.net", "verilog", "vhdl", "visual basic", "webassembly", "xml",
 49 |     "yaml", "java/c/c++/c#"}
 50 |     
 51 |     _color_tuple = namedtuple("Color", "name r g b")
 52 |     _notion_color = [
 53 |         _color_tuple("default", 0, 0, 0),
 54 |         _color_tuple("gray", 128, 128, 128),
 55 |         _color_tuple("brown", 165, 42, 42),
 56 |         _color_tuple("orange", 255, 165, 0),
 57 |         _color_tuple("yellow", 255, 255, 0),
 58 |         _color_tuple("green", 0, 128, 0),
 59 |         _color_tuple("blue", 0, 0, 255),
 60 |         _color_tuple("purple", 128, 0, 128),
 61 |         _color_tuple("pink", 255, 192, 203),
 62 |         _color_tuple("red", 255, 0, 0),
 63 |     ]
 64 | 
 65 |     # Page content should be: https://developers.notion.com/reference/post-page
 66 |     def __init__(self, html_content, import_stat):
 67 |         self.html_content = html_content
 68 |         self.children = []
 69 |         self.properties = {}
 70 |         self.parent = {}
 71 |         self.import_stat = import_stat
 72 |         if 'GITHUB_ACTIONS' in os.environ:
 73 |             notion_database_id = os.environ['notion_db_id_1']
 74 |         else:
 75 |             notion_database_id = config['notion']['database_id']
 76 |         self.parent = {"type": "database_id", "database_id": notion_database_id}
 77 | 
 78 |     def process(self):
 79 |         raise NotImplementedError("Subclasses must implement this method")
 80 | 
 81 |     def get_notion_data(self):
 82 |         return {
 83 |             key: value
 84 |             for key, value in {
 85 |                 'children': self.children,
 86 |                 'properties': self.properties,
 87 |                 'parent': self.parent,
 88 |             }.items()
 89 |             if value
 90 |         }
 91 | 
 92 |     @staticmethod
 93 |     def extract_text_and_parents(tag: PageElement, parents=[]):
 94 |         results = []
 95 |         # Filter empty content when tag is not img
 96 |         if isinstance(tag, NavigableString) and tag.strip():
 97 |             results.append((tag, parents))
 98 |             return results
 99 |         elif isinstance(tag, Tag):
100 |             if tag.name == 'img':
101 |                 img_src = tag.get('src', '')
102 |                 parent_tags = [p for p in parents + [tag]]
103 |                 results.append((img_src, parent_tags))
104 |             else:
105 |                 for child in tag.children:
106 |                     if isinstance(child, NavigableString):
107 |                         if tag.name != 'img' and child.strip():
108 |                             text = child.text
109 |                             parent_tags = [p for p in parents + [tag]]
110 |                             results.append((text, parent_tags))
111 |                     elif isinstance(child, Tag) and child.name == 'br':  
112 |                         results.append(('<br>', []))
113 |                     else:
114 |                         results.extend(Html2JsonBase.extract_text_and_parents(child, parents + [tag]))
115 |         return results
116 | 
117 |     @staticmethod
118 |     def parse_one_style(tag_soup: Tag, text_params: dict):
119 |         tag_name = tag_soup.name.lower()
120 |         styles = Html2JsonBase.get_tag_style(tag_soup)
121 |         if Html2JsonBase.is_bold(tag_name, styles):
122 |             text_params["bold"] = True
123 |         if Html2JsonBase.is_italic(tag_name, styles):
124 |             text_params["italic"] = True
125 |         if Html2JsonBase.is_strikethrough(tag_name, styles):
126 |             text_params["strikethrough"] = True
127 |         if Html2JsonBase.is_underline(tag_name, styles):
128 |             text_params["underline"] = True
129 |         if Html2JsonBase.is_code(tag_name, styles):
130 |             text_params["code"] = True
131 | 
132 |         color = Html2JsonBase.get_color(styles, tag_soup.attrs if tag_name else {})
133 |         if color != 'default':
134 |             text_params["color"] = color
135 | 
136 |         if tag_name == 'a':
137 |             href = tag_soup.get('href', "")
138 |             if not href:
139 |                 logger.warning("Link href is empty")
140 |             text_params["url"] = href
141 |         elif tag_name == 'img':
142 |             src = tag_soup.get('src', "")
143 |             # only support external image here.
144 |             if not src:
145 |                 logger.warning("Image src is empty")
146 |             text_params["src"] = src
147 |         return
148 | 
149 |     # https://developers.notion.com/reference/request-limits
150 |     # Process one tag and return a list of objects
151 |     # <b><u>unlineline and bold</u></b>
152 |     # <div><font color="#ff2600">Red color4</font></div>
153 |     # <div> Code in super note</div>
154 |     def generate_inline_obj(self, tag: PageElement):
155 |         res_obj = []
156 |         text_with_parents = Html2JsonBase.extract_text_and_parents(tag)
157 |         for (text, parent_tags) in text_with_parents:
158 |             text_params = {"plain_text": text}
159 |             for parent in parent_tags:
160 |                 Html2JsonBase.parse_one_style(parent, text_params)
161 |             if text == "<br>":
162 |                 try:
163 |                     res_obj[-1]["text"]["content"] += "\n"
164 |                     res_obj[-1]["plain_text"] += "\n"
165 |                 except Exception as e:
166 |                     pass
167 |                 continue
168 | 
169 |             link_url = text_params.get("url", "")
170 |             text_obj = {}
171 |             if text_params.get("url", "") and is_valid_url(link_url):
172 |                 text_obj = self.generate_link(**text_params)
173 |             # Here image is a independent block, split out in the outer layer
174 |             elif text_params.get("src", ""):
175 |                 text_obj = self.generate_image(**text_params)
176 |             else:
177 |                 if len(text) <= self.TEXT_MAX_LENGTH:
178 |                     text_obj = self.generate_text(**text_params)
179 |                 else:
180 |                     for chunk in [text[i:i+self.TEXT_MAX_LENGTH] for i in range(0, len(text), self.TEXT_MAX_LENGTH)]:
181 |                         text_params["plain_text"] = chunk
182 |                         text_obj = self.generate_text(**text_params)
183 |                         if text_obj:
184 |                             res_obj.append(text_obj)
185 |                     text_obj = None
186 |             if text_obj:
187 |                 res_obj.append(text_obj)
188 |         return res_obj
189 | 
190 |     def generate_link(self, **kwargs):
191 |         link_url = kwargs.get("url", "")
192 |         plain_text = kwargs.get("plain_text", "")
193 |         if not plain_text or not is_valid_url(link_url):
194 |             return
195 | 
196 |         link_url = link_url[:self.URL_MAX_LENGTH]
197 |         self.import_stat.add_notion_text(plain_text)
198 |         return {
199 |             "href": link_url,
200 |             "plain_text": plain_text,
201 |             "text": {
202 |                 "link": {"url": link_url},
203 |                 "content": plain_text
204 |             },
205 |             "type": "text"
206 |         }
207 | 
208 |     def generate_image(self, **kwargs):
209 |         source = kwargs.get("src", "")
210 |         if not source or not is_valid_url(source):
211 |             return
212 |         self.import_stat.add_notion_image(source)
213 |         image_block = {
214 |             "object": "block",
215 |             "type": "image",
216 |             "image": {
217 |                 "type": "external",
218 |                 "external": {
219 |                     "url": source
220 |                 }
221 |             }
222 |         }
223 |         return image_block
224 | 
225 |     def generate_text(self, **kwargs):
226 |         plain_text = kwargs.get("plain_text", "")
227 |         if not plain_text:
228 |             return
229 |         annotations = {
230 |             key: value
231 |             for key, value in kwargs.items()
232 |             if key in Html2JsonBase._text_annotations and isinstance(value, Html2JsonBase._text_annotations[key])
233 |         }
234 |         stats_count = kwargs.get("stats_count", True)
235 |         if stats_count:
236 |             self.import_stat.add_notion_text(plain_text)
237 |         text_obj = {
238 |             "plain_text": plain_text,
239 |             "text": {"content": plain_text},
240 |             "type": "text"
241 |         }
242 |         if annotations:
243 |             text_obj["annotations"] = annotations
244 | 
245 |         return text_obj
246 | 
247 |     def generate_properties(self, **kwargs):
248 |         title = kwargs.get("title", "")
249 |         url = kwargs.get("url", "")
250 |         tags = kwargs.get("tags", [])
251 |         created_time = kwargs.get("created_time", "")
252 | 
253 |         property_map = {
254 |             "Title": {"title": [{"text": {"content": title}}]} if title else None,
255 |             "URL": {"url": url, "type": "url"} if url else None,
256 |             "Tags": {"type": "multi_select", "multi_select": [{"name": tag} for tag in tags]} if tags else None,
257 |             "Created": {"date": {"start": created_time}, "type": "date"} if created_time else None,
258 |         }
259 | 
260 |         properties_obj = {key: value for key, value in property_map.items() if value is not None}
261 | 
262 |         logger.debug(f"properties: {properties_obj}")
263 |         return properties_obj
264 | 
265 |     @staticmethod
266 |     def is_same_annotations_text(text_one: dict, text_another: dict):
267 |         if text_one["type"] != "text" or text_another["type"] != "text":
268 |             return False
269 |         attributes = ["annotations", "href"]
270 | 
271 |         # When merging, be careful not to let the text length exceed the limit
272 |         total_size = len(text_one["text"]["content"]) + len(text_another["text"]["content"])
273 |         if total_size > Html2JsonBase.TEXT_MAX_LENGTH:
274 |             return False
275 | 
276 |         return all(text_one.get(attr) == text_another.get(attr) for attr in attributes)
277 | 
278 |     @staticmethod
279 |     def merge_rich_text(rich_text: list):
280 |         if not rich_text:
281 |             return []
282 |         merged_text = []
283 |         current_text = rich_text[0]
284 |         for text in rich_text[1:]:
285 |             if Html2JsonBase.is_same_annotations_text(current_text, text):
286 |                 text_content = current_text["text"]["content"] + text["text"]["content"]
287 |                 current_text["plain_text"] = text_content
288 |                 current_text["text"]["content"] = text_content
289 |             else:
290 |                 merged_text.append(current_text)
291 |                 current_text = text
292 |         if current_text:
293 |             merged_text.append(current_text)
294 | 
295 |         return merged_text
296 | 
297 |     @staticmethod
298 |     def is_bold(tag_name: str, styles: dict) -> bool:
299 |         if tag_name in ('b', 'strong'):
300 |             return True
301 | 
302 |         font_weight = styles.get('font-weight', None)
303 |         if font_weight is None:
304 |             return False
305 |         elif font_weight == 'bold':
306 |             return True
307 |         elif font_weight.isdigit() and int(font_weight) >= 700:
308 |             return True
309 |         return False
310 | 
311 |     @staticmethod
312 |     def is_strikethrough(tag_name: str, styles: dict) -> bool:
313 |         if tag_name in ('s', 'strike', 'del'):
314 |             return True
315 |         text_decoration = styles.get("text-decoration", "")
316 |         return "line-through" in text_decoration
317 | 
318 |     @staticmethod
319 |     def is_italic(tag_name: str, styles: dict) -> bool:
320 |         if tag_name in ('i', 'em'):
321 |             return True
322 |         font_style = styles.get('font-style', "")
323 |         return "italic" in font_style
324 | 
325 |     @staticmethod
326 |     def is_underline(tag_name: str, styles: dict) -> bool:
327 |         # A tuple of a single element requires a comma after the element
328 |         if tag_name in ('u',):
329 |             return True
330 |         text_decoration = styles.get('text-decoration', "")
331 |         return 'underline' in text_decoration
332 | 
333 |     @staticmethod
334 |     def is_code(tag_name: str, styles: dict):
335 |         if tag_name in ('code',):
336 |             return True
337 | 
338 |         # style="-en-code: true"
339 |         if styles.get('-en-code', "false") == "true":
340 |             return True
341 | 
342 |         # Check if the font-family is monospace
343 |         font_family = styles.get('font-family', "")
344 |         monospace_fonts = {'courier', 'monospace'}
345 |         if not font_family:
346 |             return False
347 |         for font in monospace_fonts:
348 |             if font.lower() == font_family.lower():
349 |                 return True
350 | 
351 |     @staticmethod
352 |     def _closest_color(r, g, b):
353 |         closest_distance = float("inf")
354 |         closest_color = None
355 | 
356 |         for color in Html2JsonBase._notion_color:
357 |             distance = ((r - color.r) ** 2 + (g - color.g) ** 2 + (b - color.b) ** 2) ** 0.5
358 |             if distance < closest_distance:
359 |                 closest_distance = distance
360 |                 closest_color = color.name
361 | 
362 |         return closest_color
363 | 
364 |     @staticmethod
365 |     def _hex_to_rgb(hex_color):
366 |         hex_color = hex_color.lstrip("#")
367 |         return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
368 | 
369 |     @staticmethod
370 |     def get_color(styles: dict, attrs):
371 |         color = styles.get('color', "")
372 |         if not color and 'color' in attrs:
373 |             color = attrs['color']
374 |         if not color:
375 |             return "default"
376 |         # If the color_values have 4 items, then it is RGBA and the last value is alpha
377 |         # rgba(174, 174, 188, 0.2)
378 |         if color.startswith("rgb"):
379 |             color_values = [int(x.strip()) for x in re.findall(r'\d+', color)]
380 |             if len(color_values) >= 3:
381 |                 r, g, b = color_values[:3]
382 |                 return Html2JsonBase._closest_color(r, g, b)
383 |         # Check if color is in hexadecimal format
384 |         elif re.match(r'^#(?:[0-9a-fA-F]{3}){1,2}$', color):
385 |             if len(color) == 4:  # Short form like #abc -> #aabbcc
386 |                 color = '#' + ''.join([c*2 for c in color[1:]])
387 |             r, g, b = Html2JsonBase._hex_to_rgb(color)
388 |             return Html2JsonBase._closest_color(r, g, b)
389 | 
390 |         return "default"
391 | 
392 |     def convert_paragraph(self, soup):
393 |         json_obj = {
394 |             "object": "block",
395 |             "type": "paragraph",
396 |             "paragraph": {
397 |                 "rich_text": []
398 |             }
399 |         }
400 |         rich_text = json_obj["paragraph"]["rich_text"]
401 |         text_obj = self.generate_inline_obj(soup)
402 |         if text_obj:
403 |             rich_text.extend(text_obj)
404 | 
405 |         # Split out image into a independent blocks
406 |         split_objs = Html2JsonBase.split_image_src(json_obj)
407 |         return Html2JsonBase.ensure_array_len(split_objs)
408 | 
409 |     def convert_divider(self, soup):
410 |         return {
411 |             "object": "block",
412 |             "type": "divider",
413 |             "divider": {}
414 |         }
415 | 
416 |     def convert_heading(self, soup):
417 |         heading_map = {"h1": "heading_1", "h2": "heading_2", "h3": "heading_3",
418 |                        "h4": "heading_3", "h5": "heading_3", "h6": "heading_3"}
419 | 
420 |         heading_level = heading_map.get(soup.name, "heading_3")
421 |         json_obj = {
422 |             "object": "block",
423 |             "type": heading_level,
424 |             heading_level: {
425 |                 "rich_text": []
426 |             }
427 |         }
428 |         rich_text = json_obj[heading_level]["rich_text"]
429 |         text_obj = self.generate_inline_obj(soup)
430 |         if text_obj:
431 |             rich_text.extend(text_obj)
432 |             return json_obj
433 |         return None
434 | 
435 |     # <ol><li><div>first</div></li><li><div>second</div></li><li><div>third</div></li></ol>
436 |     def convert_numbered_list_item(self, soup):
437 |         return self.convert_list_items(soup, 'numbered_list_item')
438 | 
439 |     # <ul><li><div>itemA</div></li><li><div>itemB</div></li><li><div>itemC</div></li></ul>
440 |     def convert_bulleted_list_item(self, soup):
441 |         return self.convert_list_items(soup, 'bulleted_list_item')
442 | 
443 |     def convert_list_items(self, soup, list_type):
444 |         # Remove heading tags in li
445 |         for heading in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
446 |             heading.unwrap()
447 | 
448 |         items = soup.find_all('li', recursive=True)
449 |         if not items:
450 |             logger.warning("No list items found in {soup}")
451 | 
452 |         json_arr = []
453 |         for item in items:
454 |             one_item = self._convert_one_list_item(item, list_type)
455 |             if one_item:
456 |                 json_arr.append(one_item)
457 |             else:
458 |                 logger.info(f'empty {item}')
459 |         return json_arr
460 | 
461 |     def _convert_one_list_item(self, soup, list_type):
462 |         if list_type not in {'numbered_list_item', 'bulleted_list_item'}:
463 |             logger.warning(f'Not support list_type')
464 | 
465 |         json_obj = {
466 |             "object": "block",
467 |             list_type: {
468 |                 "rich_text": []
469 |             },
470 |             "type": list_type,
471 |         }
472 |         rich_text = json_obj[list_type]["rich_text"]
473 |         text_obj = self.generate_inline_obj(soup)
474 |         if text_obj:
475 |             rich_text.extend(text_obj)
476 | 
477 |         return json_obj
478 | 
479 |     """
480 |     <div>
481 |     <div><br /></div>
482 |     <table> <thead> </thead><tbody> <tr> <td> </td> </tr> </tbody> </table>
483 |     <div><br /></div>
484 |     </div>
485 |     """
486 |     # ../examples/insert_table.ipynb
487 |     def convert_table(self, soup):
488 |         table_rows = []
489 |         tr_tags = soup.find_all('tr')
490 |         if not tr_tags:
491 |             logger.error(f"No tr found in {soup}")
492 |             return
493 |         
494 |         table_width = len(tr_tags[0].find_all('td'))
495 |         has_header = False
496 |         for tr in tr_tags:
497 |             td_tags = tr.find_all('td')
498 |             if not td_tags:
499 |                 td_tags = tr.find_all('th')
500 |                 has_header = True
501 |             table_width = max(table_width, len(td_tags))
502 |             one_row = {
503 |                 "type": "table_row",
504 |                 "table_row": {
505 |                     "cells": []
506 |                 }
507 |             }
508 |             for td in td_tags:
509 |                 col = self.generate_inline_obj(td)
510 |                 one_row["table_row"]["cells"].append(col)
511 |             table_rows.append(one_row)
512 | 
513 |         table_obj = {
514 |             "table": {
515 |                 "has_row_header": False,
516 |                 "has_column_header": has_header,
517 |                 "table_width": table_width,
518 |                 "children": table_rows,
519 |             }
520 |         }
521 |         return table_obj
522 | 
523 |     @staticmethod
524 |     def split_image_src(text_obj):
525 |         rich_text = text_obj["paragraph"]["rich_text"]
526 |         need_split = any(text.get("object") == "block" for text in rich_text)
527 |         if not need_split:
528 |             return [text_obj]
529 |         
530 |         split_obj = []
531 |         cur_obj = {
532 |             "object": "block",
533 |             "type": "paragraph",
534 |             "paragraph": {
535 |                 "rich_text": []
536 |             }
537 |         }
538 |         for text in rich_text:
539 |             if text.get("object") == "block":
540 |                 if len(cur_obj["paragraph"]["rich_text"]) > 0:
541 |                     split_obj.append(copy.deepcopy(cur_obj))
542 |                     cur_obj["paragraph"]["rich_text"].clear()
543 |                 split_obj.append(text)
544 |                 continue
545 |             cur_obj["paragraph"]["rich_text"].append(text)
546 |         if len(cur_obj["paragraph"]["rich_text"]) > 0:
547 |             split_obj.append(cur_obj)
548 |         return split_obj
549 | 
550 |     # Only if there is no ";" in the value of the attribute, you can use this method to get all attributes.
551 |     # Can't use this way like: background-image: url('data:image/png;base64...') 
552 |     @staticmethod
553 |     def get_tag_style(tag_soup):
554 |         styles = {}
555 |         if not isinstance(tag_soup, Tag):
556 |             return styles
557 |         style = tag_soup.get('style', "")
558 |         if str and isinstance(style, str):
559 |             # style = ''.join(style.split())
560 |             styles = {
561 |                 rule.split(':')[0].strip(): rule.split(':')[1].strip().lower()
562 |                 for rule in style.split(';')
563 |                 if rule and len(rule.split(':')) > 1
564 |             }
565 |         return styles
566 | 
567 |     @staticmethod
568 |     def get_valid_language(language):
569 |         if language in Html2JsonBase._language:
570 |             return language
571 |         return "plain text"
572 |     
573 |     @staticmethod
574 |     def ensure_array_len(blocks):
575 |         final_objs = []
576 |         for obj in blocks:
577 |             if "paragraph" not in obj or "rich_text" not in obj["paragraph"] or len(
578 |                     obj["paragraph"]["rich_text"]) <= Html2JsonBase.RICHTEXT_ARRAY_LENGTH:
579 |                 final_objs.append(obj)
580 |                 continue
581 | 
582 |             # If the length of rich_text is greater than RICHTEXT_ARRAY_LENGTH, we split it
583 |             rich_text_arr = obj["paragraph"]["rich_text"]
584 |             rich_texts = [rich_text_arr[i:i+Html2JsonBase.RICHTEXT_ARRAY_LENGTH]
585 |                           for i in range(0, len(rich_text_arr), Html2JsonBase.RICHTEXT_ARRAY_LENGTH)]
586 |             for rich_text in rich_texts:
587 |                 new_json_obj = {
588 |                     "object": "block",
589 |                     "type": "paragraph",
590 |                     "paragraph": {
591 |                         "rich_text": rich_text
592 |                     }
593 |                 }
594 |                 final_objs.append(new_json_obj)
595 |         return final_objs
596 | 
597 |     @classmethod
598 |     def register(cls, input_type, subclass):
599 |         cls._registry[input_type] = subclass
600 | 
601 |     @classmethod
602 |     def create(cls, input_type, html_content, import_stat):
603 |         subclass = cls._registry.get(input_type)
604 |         if subclass is None:
605 |             raise ValueError(f"noknown: {input_type}")
606 |         return subclass(html_content, import_stat)
607 | 


--------------------------------------------------------------------------------
/html2notion/translate/html2json_clipper.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup, NavigableString, Tag
  2 | from ..utils import logger, DateStrToISO8601
  3 | from ..translate.html2json_base import Html2JsonBase, Block
  4 | 
  5 | YinXiangClipper_Type = "clipper.yinxiang"
  6 | 
  7 | 
  8 | class Html2JsonClipper(Html2JsonBase):
  9 |     input_type = YinXiangClipper_Type
 10 | 
 11 |     def __init__(self, html_content, import_stat):
 12 |         super().__init__(html_content, import_stat)
 13 | 
 14 |     def process(self):
 15 |         soup = BeautifulSoup(self.html_content, 'html.parser')
 16 |         self.convert_properties(soup)
 17 | 
 18 |         content_tags = soup.body
 19 |         if not content_tags:
 20 |             logger.error("No content found")
 21 |             raise Exception("No content found")
 22 | 
 23 |         self.import_stat.add_text(content_tags.get_text())
 24 |         self.convert_children(content_tags)  # Assesume only one body tag
 25 | 
 26 |         return YinXiangClipper_Type
 27 | 
 28 |     def convert_properties(self, soup):
 29 |         properties = {"title": "Unknown"}
 30 |         title_tag = soup.select_one('head > title')
 31 |         if title_tag:
 32 |             properties["title"] = title_tag.text
 33 | 
 34 |         meta_tags = [
 35 |             ('head > meta[name="source-url"]', "url"),
 36 |             ('head > meta[name="keywords"]', "tags", lambda x: x.split(",")),
 37 |             ('head > meta[name="created"]', "created_time", DateStrToISO8601),
 38 |         ]
 39 | 
 40 |         for selector, key, *converter in meta_tags:
 41 |             tag = soup.select_one(selector)
 42 |             if tag and tag.get('content', None):
 43 |                 content = tag['content']
 44 |                 properties[key] = converter[0](content) if converter else content
 45 | 
 46 |         self.properties = self.generate_properties(**properties)
 47 |         return
 48 | 
 49 |     def get_block_type(self, element):
 50 |         tag_name = element.name
 51 |         if tag_name == "p":
 52 |             return Block.PARAGRAPH.value
 53 |         elif tag_name == "table":
 54 |             return Block.TABLE.value
 55 |         elif tag_name in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
 56 |             return Block.HEADING.value
 57 |         elif tag_name == 'hr':
 58 |             return Block.DIVIDER.value
 59 |         elif tag_name == 'ol':
 60 |             return Block.NUMBERED_LIST.value
 61 |         elif tag_name == 'ul':
 62 |             return Block.BULLETED_LIST.value
 63 |         elif element.name == 'pre' and element.code:
 64 |             return Block.CODE.value
 65 |         elif self._check_is_block(element):
 66 |             return Block.QUOTE.value
 67 | 
 68 |         return Block.FAIL.value
 69 | 
 70 |     def convert_children(self, soup):
 71 |         processed_tags = set()
 72 |         for element in soup.descendants:
 73 |             if isinstance(element, NavigableString):
 74 |                 continue
 75 |             if any(id(ancestor) in processed_tags for ancestor in element.parents):
 76 |                 logger.debug(f"Skip processed tag {element}")
 77 |                 continue
 78 |             block_type = self.get_block_type(element)
 79 |             if hasattr(self, f"convert_{block_type}"):
 80 |                 converter = getattr(self, f"convert_{block_type}")
 81 |                 block = converter(element)
 82 |                 if block:
 83 |                     self.children.extend([block] if not isinstance(block, list) else block)
 84 |                     processed_tags.add(id(element))
 85 |         unprocessed_tags = set()
 86 |         for element in soup.descendants:
 87 |             if not isinstance(element, NavigableString) or id(element) in processed_tags:
 88 |                 continue
 89 |             if any(id(ancestor) in processed_tags for ancestor in element.parents):
 90 |                 continue
 91 |             unprocessed_tags.add(element)
 92 | 
 93 |         for unprocessed_tag in unprocessed_tags:
 94 |             logger.warning(f"Unknown tag {unprocessed_tag.name}, {self.get_block_type(unprocessed_tag)}")
 95 |             self.import_stat.add_skip_tag(unprocessed_tag.get_text())
 96 |         return
 97 | 
 98 |     # <pre><code><code>line number</code>... code content ...</code></pre>
 99 |     def convert_code(self, soup):
100 |         json_obj = {
101 |             "object": "block",
102 |             "type": "code",
103 |             "code": {
104 |                 "rich_text": [],
105 |                 "language": "plain text",
106 |             },
107 |         }
108 |         rich_text = json_obj["code"]["rich_text"]
109 |         code_tag = soup.code
110 |         if not code_tag:
111 |             logger.error(f'No code tag found in {soup}')
112 |             return
113 |         children_list = list(code_tag.children) if isinstance(code_tag, Tag) else [code_tag]
114 |         for child in children_list:
115 |             if isinstance(child, Tag) and child.name == "code":
116 |                 logger.debug(f'Skip line number')
117 |                 continue
118 |             text_obj = self.generate_inline_obj(child)
119 |             if text_obj:
120 |                 rich_text.extend(text_obj)
121 |         json_obj["code"]["rich_text"] = self.merge_rich_text(rich_text)
122 |         return json_obj
123 | 
124 |     def convert_quote(self, soup):
125 |         json_obj = {
126 |             "object": "block",
127 |             "type": "quote",
128 |             "quote": {
129 |                 "rich_text": []
130 |             }
131 |         }
132 |         rich_text = json_obj["quote"]["rich_text"]
133 |         text_obj = self.generate_inline_obj(soup)
134 |         if text_obj:
135 |             rich_text.extend(text_obj)
136 | 
137 |         # Merge tags has same anotions
138 |         return json_obj
139 | 
140 |     def _check_is_block(self, element):
141 |         quote_elements = {'blockquote', 'q', 'cite'}
142 |         if element.name in quote_elements:
143 |             return True
144 | 
145 |         if element.name != 'div':
146 |             return False
147 | 
148 |         # if 'class' in element.attrs:
149 |         #     if any('quote' in class_name.lower() for class_name in element.attrs['class']):
150 |         #         return True
151 | 
152 |         # if 'style' in element.attrs:
153 |         #     style_attrs = element.attrs['style'].lower()
154 |         #     if 'border:' in style_attrs or 'padding:' in style_attrs:
155 |         #         return True
156 | 
157 |         return False
158 | 
159 |     
160 | Html2JsonBase.register(YinXiangClipper_Type, Html2JsonClipper)
161 | 


--------------------------------------------------------------------------------
/html2notion/translate/html2json_default.py:
--------------------------------------------------------------------------------
 1 | # For notes that are clipped from web pages
 2 | # that are not written manually by Evernote and have rich text formatting,
 3 | # try to keep the format for conversion
 4 | 
 5 | from ..translate.html2json_base import Html2JsonBase
 6 | 
 7 | Default_Type = "default"
 8 | 
 9 | 
10 | class Html2JsonDefault(Html2JsonBase):
11 |     input_type = Default_Type
12 | 
13 |     def __init__(self, html_content, import_stat):
14 |         super().__init__(html_content, import_stat)
15 | 
16 |     # todo
17 |     def process(self):
18 |         return Default_Type
19 | 
20 | 
21 | Html2JsonBase.register(Default_Type, Html2JsonDefault)
22 | 


--------------------------------------------------------------------------------
/html2notion/translate/html2json_markdown.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from bs4 import BeautifulSoup, Tag
  3 | from urllib.parse import unquote
  4 | from ..utils import logger, is_valid_url, DateStrToISO8601
  5 | from ..translate.html2json_base import Html2JsonBase, Block
  6 | 
  7 | YinXiangMarkdown_Type = "markdown.yinxiang"
  8 | 
  9 | # Yinxiang markdown
 10 | # https://list.yinxiang.com/markdown/eef42447-db3f-48ee-827b-1bb34c03eb83.php
 11 | 
 12 | 
 13 | class Html2JsonMarkdown(Html2JsonBase):
 14 |     input_type = YinXiangMarkdown_Type
 15 |     undo_image = "url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAYAAAAfSC3RAAAAAXNSR0IArs4c6QAAADdJREFUKBVjvHv37n8GMgALSI+SkhJJWu/du8fARJIOJMWjGpECA505GjjoIYLEB6dVUNojFQAA/1MJUFWet/4AAAAASUVORK5CYII=')"
 16 | 
 17 |     def __init__(self, html_content, import_stat):
 18 |         super().__init__(html_content, import_stat)
 19 |         self.markdown = ""
 20 | 
 21 |     def process(self):
 22 |         soup = BeautifulSoup(self.html_content, 'html.parser')
 23 |         self.convert_properties(soup)
 24 | 
 25 |         content_tags = soup.body
 26 |         if not content_tags:
 27 |             logger.error("No content found")
 28 |             raise Exception("No content found")
 29 | 
 30 |         # The center records the contents of the original markdown file, which is useless
 31 |         center_to_delete = content_tags.find('center')
 32 |         if isinstance(center_to_delete, Tag):
 33 |             md_encode = center_to_delete.get_text()
 34 |             self.markdown = unquote(md_encode)
 35 |         if isinstance(center_to_delete, Tag):
 36 |             center_to_delete.decompose()
 37 | 
 38 |         # Special handling contains blocks of code, 
 39 |         # because some chart blocks are converted into images and cannot be processed directly
 40 |         self._replace_pre_code(soup)
 41 |         self.import_stat.add_text(content_tags.get_text())
 42 |         img_tags = content_tags.find_all('img')
 43 |         for img in img_tags:
 44 |             img_src = img.get('src', '')
 45 |             if is_valid_url(img_src):
 46 |                 self.import_stat.add_image(img_src)
 47 | 
 48 |         self.convert_children(content_tags)  # Assesume only one body tag
 49 | 
 50 |         return YinXiangMarkdown_Type
 51 | 
 52 |     def convert_properties(self, soup):
 53 |         properties = {"title": "Unknown"}
 54 |         title_tag = soup.select_one('head > title')
 55 |         if title_tag:
 56 |             properties["title"] = title_tag.text
 57 | 
 58 |         meta_tags = [
 59 |             ('head > meta[name="source-url"]', "url"),
 60 |             ('head > meta[name="keywords"]', "tags", lambda x: x.split(",")),
 61 |             ('head > meta[name="created"]', "created_time", DateStrToISO8601),
 62 |         ]
 63 | 
 64 |         for selector, key, *converter in meta_tags:
 65 |             tag = soup.select_one(selector)
 66 |             if tag and tag.get('content', None):
 67 |                 content = tag['content']
 68 |                 properties[key] = converter[0](content) if converter else content
 69 | 
 70 |         self.properties = self.generate_properties(**properties)
 71 |         return
 72 | 
 73 |     def get_block_type(self, element):
 74 |         tag_name = element.name
 75 |         if tag_name == "p":
 76 |             return Block.PARAGRAPH.value
 77 |         elif tag_name == "table":
 78 |             return Block.TABLE.value
 79 |         elif tag_name in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
 80 |             return Block.HEADING.value
 81 |         elif tag_name == 'hr':
 82 |             return Block.DIVIDER.value
 83 |         elif tag_name == 'ol':
 84 |             return Block.NUMBERED_LIST.value
 85 |         elif tag_name == 'ul':
 86 |             if self._is_checkbox(element):
 87 |                 return Block.TO_DO.value
 88 |             return Block.BULLETED_LIST.value
 89 |         elif element.name == 'pre' and element.code:
 90 |             if self._is_math(element):
 91 |                 return Block.EQUATION.value
 92 |             return Block.CODE.value
 93 |         elif element.name == "blockquote":
 94 |             return Block.QUOTE.value
 95 | 
 96 |         return Block.FAIL.value
 97 | 
 98 |     def convert_children(self, soup):
 99 |         div_tag = soup.find('div')
100 |         if not div_tag:
101 |             logger.error(f'No div tag found in {soup}')
102 |             return
103 |         for child in div_tag.children:
104 |             block_type = self.get_block_type(child)
105 |             logger.debug(f'block_type: {block_type}, child: {child}')
106 |             if hasattr(self, f"convert_{block_type}"):
107 |                 converter = getattr(self, f"convert_{block_type}")
108 |                 block = converter(child)
109 |                 if block:
110 |                     self.children.extend([block] if not isinstance(block, list) else block)
111 |             else:
112 |                 self.import_stat.add_skip_tag(child.get_text())
113 |                 logger.warning(f"Unknown tag : {child}")
114 |         return
115 | 
116 |     def convert_code(self, soup):
117 |         json_obj = {
118 |             "object": "block",
119 |             "type": "code",
120 |             "code": {
121 |                 "rich_text": [],
122 |                 "language": "plain text",
123 |             },
124 |         }
125 |         rich_text = json_obj["code"]["rich_text"]
126 |         code_tag = soup.code
127 |         if not code_tag:
128 |             logger.error(f'No code tag found in {soup}')
129 |             return
130 |         children_list = list(code_tag.children) if isinstance(code_tag, Tag) else [code_tag]
131 |         for child in children_list:
132 |             text_obj = self.generate_inline_obj(child)
133 |             if text_obj:
134 |                 rich_text.extend(text_obj)
135 | 
136 |         css_dict = Html2JsonBase.get_tag_style(code_tag)
137 |         language = css_dict.get('language', 'plain text')
138 |         json_obj["code"]["language"] = Html2JsonBase.get_valid_language(language)
139 |         json_obj["code"]["rich_text"] = self.merge_rich_text(rich_text)
140 |         return json_obj
141 | 
142 |     def convert_quote(self, soup):
143 |         json_obj = {
144 |             "object": "block",
145 |             "type": "quote",
146 |             "quote": {
147 |                 "rich_text": []
148 |             }
149 |         }
150 |         rich_text = json_obj["quote"]["rich_text"]
151 |         text_obj = self.generate_inline_obj(soup)
152 |         if text_obj:
153 |             rich_text.extend(text_obj)
154 |         return json_obj
155 | 
156 |     def convert_equation(self, soup: Tag):
157 |         json_obj = {
158 |             "object": "block",
159 |             "type": "paragraph",
160 |             "paragraph": {
161 |                 "rich_text": []
162 |             }
163 |         }
164 |         expression = soup.get_text()[:Html2JsonBase.EXPRESSION_MAX_LENGTH]
165 |         equation = json_obj["paragraph"]["rich_text"]
166 |         equation.append({
167 |             "type": "equation",
168 |             "equation": {
169 |                 "expression": expression
170 |             }
171 |         })
172 |         return json_obj
173 |     
174 |     def convert_to_do(self, soup: Tag):
175 |         li_tags = soup.find_all('li', recursive=True)
176 |         childs = li_tags if li_tags else [soup]
177 |         to_do_blocks = []
178 |         for child in childs:
179 |             json_obj = {
180 |                 "object": "block",
181 |                 "type": "to_do",
182 |                 "to_do": {
183 |                     "rich_text": [],
184 |                     "checked": False
185 |                 }
186 |             }
187 |             text = json_obj["to_do"]["rich_text"]
188 |             text_obj = self.generate_inline_obj(child)
189 |             if text_obj:
190 |                 text.extend(text_obj)
191 | 
192 |             style = child.get('style', '')
193 |             if isinstance(style, str) and Html2JsonMarkdown.undo_image not in style:
194 |                 json_obj["to_do"]["checked"] = True
195 |             to_do_blocks.append(json_obj)
196 |         return to_do_blocks
197 | 
198 |     # Each style in <li> has a background-image, which is considered a check box
199 |     def _is_checkbox(self, soup):
200 |         for li in soup.find_all('li'):
201 |             style = li.get('style', '')
202 |             if not "background-image: url('data:image/png;" in style:
203 |                 return False
204 |         return True
205 | 
206 |     def _extract_code_blocks(self):
207 |         code_pattern = re.compile(r'```(\w+)?\n(.*?)```', re.DOTALL)
208 |         matches = code_pattern.findall(self.markdown)
209 |         code_blocks = [{'language': match[0], 'code': match[1].rstrip('\n')} for match in matches]
210 |         return code_blocks
211 | 
212 |     def _replace_pre_code(self, soup):
213 |         markdown_code_blocks = self._extract_code_blocks()
214 |         count = sum(1 for pre_tag in soup.find_all('pre') if pre_tag.find('code'))
215 | 
216 |         if markdown_code_blocks and count != len(markdown_code_blocks):
217 |             logger.warning(f'Code block count not match: {count} != {len(markdown_code_blocks)}')
218 |             return
219 | 
220 |         pre_tags = soup.find_all('pre')
221 |         idx = 0
222 |         for pre in pre_tags:
223 |             code = pre.find('code')
224 |             if not code:
225 |                 continue
226 |             new_tag = soup.new_tag('code')
227 |             new_tag.string = markdown_code_blocks[idx]['code']
228 |             new_tag['style'] = 'language: ' + markdown_code_blocks[idx]['language']
229 |             idx += 1
230 |             code.replace_with(new_tag)
231 |         return soup
232 | 
233 |     def _is_math(self, soup):
234 |         code_tag = soup.code
235 |         if not code_tag:
236 |             return False
237 | 
238 |         css_dict = Html2JsonBase.get_tag_style(code_tag)
239 |         if 'language' in css_dict and css_dict['language'] == 'math':
240 |             return True
241 |         return False
242 | 
243 | 
244 | Html2JsonBase.register(YinXiangMarkdown_Type, Html2JsonMarkdown)
245 | 


--------------------------------------------------------------------------------
/html2notion/translate/html2json_yinxiang.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup, Tag
  2 | from ..utils import logger, DateStrToISO8601
  3 | from ..translate.html2json_base import Html2JsonBase, Block 
  4 | 
  5 | YinXiang_Type = "yinxiang"
  6 | 
  7 | 
  8 | class Html2JsonYinXiang(Html2JsonBase):
  9 |     input_type = YinXiang_Type
 10 | 
 11 |     def __init__(self, html_content, import_stat):
 12 |         super().__init__(html_content, import_stat)
 13 | 
 14 |     def process(self):
 15 |         soup = BeautifulSoup(self.html_content, 'html.parser')
 16 |         self.convert_children(soup)
 17 |         self.convert_properties(soup)
 18 |         return YinXiang_Type
 19 | 
 20 |     def convert_properties(self, soup):
 21 |         properties = {"title": "Unknown"}
 22 |         title_tag = soup.select_one('head > title')
 23 |         if title_tag:
 24 |             properties["title"] = title_tag.text
 25 | 
 26 |         meta_tags = [
 27 |             ('head > meta[name="source-url"]', "url"),
 28 |             ('head > meta[name="keywords"]', "tags", lambda x: x.split(",")),
 29 |             ('head > meta[name="created"]', "created_time", DateStrToISO8601),
 30 |         ]
 31 | 
 32 |         for selector, key, *converter in meta_tags:
 33 |             tag = soup.select_one(selector)
 34 |             if tag and tag.get('content', None):
 35 |                 content = tag['content']
 36 |                 properties[key] = converter[0](content) if converter else content
 37 | 
 38 |         self.properties = self.generate_properties(**properties)
 39 |         return
 40 | 
 41 |     def convert_children(self, soup):
 42 |         content_tags = soup.find_all('body', recursive=True)
 43 |         if not content_tags:
 44 |             logger.warning("No content found")
 45 |             raise Exception("No content found")
 46 | 
 47 |         self.import_stat.add_text(content_tags[0].get_text())
 48 |         for child in content_tags[0].children:
 49 |             block_type = self.get_block_type(child)
 50 |             # Computer all text len in html
 51 |             logger.debug(f'Support tag {child} with style {block_type}')
 52 |             if hasattr(self, f"convert_{block_type}"):
 53 |                 converter = getattr(self, f"convert_{block_type}")
 54 |                 block = converter(child)
 55 |                 if block:
 56 |                     self.children.extend([block] if not isinstance(block, list) else block)
 57 |             else:
 58 |                 self.import_stat.add_skip_tag(child.get_text())
 59 |                 logger.warning(f"Unknown tag : {child}")
 60 |     
 61 |     def convert_code(self, soup):
 62 |         json_obj = {
 63 |             "object": "block",
 64 |             "type": "code",
 65 |             "code": {
 66 |                 "rich_text": [],
 67 |                 "language": "plain text",
 68 |             },
 69 |         }
 70 |         rich_text = json_obj["code"]["rich_text"]
 71 | 
 72 |         children_list = list(soup.children) if isinstance(soup, Tag) else [soup]
 73 |         for index, child in enumerate(children_list):
 74 |             is_last_child = index == len(children_list) - 1
 75 |             text_obj = self.generate_inline_obj(child)
 76 |             if text_obj:
 77 |                 rich_text.extend(text_obj)
 78 |             if not is_last_child:
 79 |                 rich_text.append(self.generate_text(plain_text='\n', stats_count=False))
 80 |         json_obj["code"]["rich_text"] = self.merge_rich_text(rich_text)
 81 |         css_dict = Html2JsonBase.get_tag_style(soup)
 82 |         language = css_dict.get('--en-codeblockLanguage', 'plain text')
 83 |         json_obj["code"]["language"] = language
 84 |         return json_obj
 85 | 
 86 |     def convert_quote(self, soup):
 87 |         json_obj = {
 88 |             "object": "block",
 89 |             "type": "quote",
 90 |             "quote": {
 91 |                 "rich_text": []
 92 |             }
 93 |         }
 94 |         rich_text = json_obj["quote"]["rich_text"]
 95 | 
 96 |         children_list = list(soup.children)
 97 |         for index, child in enumerate(children_list):
 98 |             is_last_child = index == len(children_list) - 1
 99 |             text_obj = self.generate_inline_obj(child)
100 |             if text_obj:
101 |                 rich_text.extend(text_obj)
102 |             if not is_last_child:
103 |                 rich_text.append(self.generate_text(plain_text='\n', stats_count=False))
104 | 
105 |         # Merge tags has same anotions
106 |         logger.debug(f'before merge: {rich_text}')
107 |         json_obj["quote"]["rich_text"] = self.merge_rich_text(rich_text)
108 |         return json_obj
109 | 
110 |     def convert_to_do(self, soup: Tag):
111 |         # Compatible with the situation where input is under li tag(super note).
112 |         li_tags = soup.find_all('li', recursive=True)
113 |         childs = li_tags if li_tags else [soup]
114 |         to_do_blocks = []
115 |         for child in childs:
116 |             json_obj = {
117 |                 "object": "block",
118 |                 "type": "to_do",
119 |                 "to_do": {
120 |                     "rich_text": [],
121 |                     "checked": False
122 |                 }
123 |             }
124 |             text = json_obj["to_do"]["rich_text"]
125 |             text_obj = self.generate_inline_obj(child)
126 |             if text_obj:
127 |                 text.extend(text_obj)
128 |             input_tag = child.find('input')
129 |             if input_tag and isinstance(input_tag, Tag) and input_tag.get('checked', 'false') == 'true':
130 |                 json_obj["to_do"]["checked"] = True
131 |             to_do_blocks.append(json_obj)
132 |         return to_do_blocks
133 |   
134 |     def get_block_type(self, single_tag):
135 |         tag_name = single_tag.name
136 |         style = single_tag.get('style') if tag_name else ""
137 | 
138 |         # There are priorities here. It is possible to hit multiple targets 
139 |         # at the same time, and the first one takes precedence.
140 |         if self._check_is_todo(single_tag):
141 |             return Block.TO_DO.value
142 |         elif tag_name == 'hr':
143 |             return Block.DIVIDER.value
144 |         elif tag_name == 'ol':
145 |             return Block.NUMBERED_LIST.value
146 |         elif tag_name == 'ul':
147 |             return Block.BULLETED_LIST.value
148 |         elif tag_name == 'p':
149 |             return Block.PARAGRAPH.value
150 |         elif tag_name in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
151 |             return Block.HEADING.value
152 |         elif tag_name == 'table' or self._check_is_table(single_tag):
153 |             return Block.TABLE.value
154 |         
155 |         css_dict = Html2JsonBase.get_tag_style(single_tag)
156 |         if css_dict.get('--en-blockquote', None) == 'true':
157 |             return Block.QUOTE.value
158 |         if css_dict.get('--en-codeblock', None) == 'true':
159 |             return Block.CODE.value
160 |         if css_dict.get('-en-codeblock', None) == 'true':
161 |             return Block.CODE.value
162 | 
163 |         # Issue 5: <div style="orphans: 2; widows: 2">
164 |         if tag_name == 'div':
165 |             return Block.PARAGRAPH.value
166 |         return Block.FAIL.value
167 | 
168 |     # <div> <table> </table> </div>
169 |     def _check_is_table(self, tag):
170 |         if tag.name == "div":
171 |             children = list(filter(lambda x: x != '\n', tag.contents))
172 |             table_count = sum(1 for child in children if child.name == "table")
173 |             return table_count >= 1
174 |         return False
175 | 
176 |     def _check_is_todo(self, tag):
177 |         if not isinstance(tag, Tag):
178 |             return False
179 |         input_tag = tag.find('input')
180 |         if input_tag and isinstance(input_tag, Tag) and input_tag.get('type') == 'checkbox':
181 |             return True
182 |         return False
183 | 
184 | Html2JsonBase.register(YinXiang_Type, Html2JsonYinXiang)
185 | 


--------------------------------------------------------------------------------
/html2notion/translate/import_stats.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from enum import Enum
 3 | 
 4 | 
 5 | class StatLevel(Enum):
 6 |     EXCEPTION = "exception"
 7 |     LOSS = "loss"
 8 |     SUCC = "success"
 9 | 
10 | 
11 | class ImportStats:
12 |     def __init__(self):
13 |         self.text_count = 0
14 |         self.image_count = 0
15 |         self.notion_text_count = 0
16 |         self.notion_image_count = 0
17 |         self.skip_tag = []
18 |         self.exception = None
19 |         self.filename = "None"
20 |         self.html_content = ""
21 |         self.notion_content = ""
22 |         self.html_image_src = []
23 |         self.notion_image_src = []
24 |         self.head_meta = {}
25 | 
26 |     def add_text(self, text: str):
27 |         self.text_count += len(text)
28 |         self.html_content += text
29 | 
30 |     def add_notion_text(self, text: str):
31 |         self.notion_content += text
32 |         self.notion_text_count += len(text)
33 | 
34 |     def add_image(self, src: str):
35 |         self.html_image_src.append(src)
36 |         self.image_count += 1
37 | 
38 |     def add_notion_image(self, src: str):
39 |         self.notion_image_src.append(src)
40 |         self.notion_image_count += 1
41 | 
42 |     def add_skip_tag(self, tag):
43 |         self.skip_tag.append(tag)
44 | 
45 |     def set_filename(self, filename: Path):
46 |         self.filename = filename
47 | 
48 |     def set_exception(self, exception: Exception):
49 |         self.exception = exception
50 | 
51 |     def get_level(self):
52 |         if self.exception:
53 |             return StatLevel.EXCEPTION.value
54 |         if self.notion_text_count < self.text_count:
55 |             return StatLevel.LOSS.value
56 |         return StatLevel.SUCC.value
57 | 
58 |     def __str__(self):
59 |         msg = ""
60 |         if self.get_level() == StatLevel.EXCEPTION.value:
61 |             msg += f"[red]{str(self.exception)}[/red]"
62 |             if 'body.parent.page_id should be defined' in str(self.exception):
63 |                 msg += f"\nHeadmeta: [yellow]{self.head_meta}[/yellow]"
64 | 
65 |         if self.get_level() == StatLevel.LOSS.value:
66 |             if self.text_count != self.notion_text_count:
67 |                 msg += f"Text Len {self.text_count} -> {self.notion_text_count}, Loss [yellow]{self.text_count-self.notion_text_count}[/yellow]"
68 | 
69 |             msg += '\nDetail: [yellow]' + ";".join([repr(s) for s in self.skip_tag])[:500] + "[/yellow]"
70 |         return msg
71 | 
72 |     def get_detail(self):
73 |         return f"filename: {self.filename}, {self.text_count} text, {self.image_count} image\nNotion {self.notion_text_count} text, {self.notion_image_count} image\n{self.skip_tag}"
74 | 
75 | 
76 | if __name__ == '__main__':
77 |     task_stats = ImportStats()
78 |     task_stats.add_text(100)
79 |     task_stats.add_image(20)
80 |     task_stats.add_notion_text(80)
81 |     task_stats.add_notion_image(15)
82 |     task_stats.set_exception(Exception("Some error occurred"))
83 | 
84 |     print(task_stats)
85 | 


--------------------------------------------------------------------------------
/html2notion/translate/notion_export.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from notion_client import Client, errors as notion_errors
  3 | from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
  4 | from ..utils import logger, test_prepare_conf, config
  5 | 
  6 | class NotionExporter:
  7 |     # Remove keys which not used by add page
  8 |     delete_block = {
  9 |         "rich_text": [
 10 |             {
 11 |                 # "type": "text",
 12 |                 "text": {
 13 |                     # "content": "测试第一行",
 14 |                     "link": None
 15 |                 },
 16 |                 "annotations": {
 17 |                     "bold": False,
 18 |                     "italic": False,
 19 |                     "strikethrough": False,
 20 |                     "underline": False,
 21 |                     "code": False,
 22 |                     "color": "default"
 23 |                 },
 24 |                 # "plain_text": "测试第一行",
 25 |                 "href": None
 26 |             }
 27 |         ],
 28 |         "color": "default",
 29 |         "is_toggleable": False
 30 |     }
 31 | 
 32 |     delete_conf = {
 33 |         # "object": "block",
 34 |         "id": "__any__",
 35 |         "parent": "__any__",
 36 |         "created_time": "__any__",
 37 |         "last_edited_time": "__any__",
 38 |         "created_by": "__any__",
 39 |         "last_edited_by": "__any__",
 40 |         "has_children": False,
 41 |         "archived": False,
 42 |         # "type": "paragraph",
 43 |         "paragraph": delete_block,
 44 |         "quote": delete_block,
 45 |         "numbered_list_item": delete_block,
 46 |         "bulleted_list_item": delete_block,
 47 |         "heading_1": delete_block,
 48 |         "heading_2": delete_block,
 49 |         "heading_3": delete_block,
 50 |     }
 51 | 
 52 |     def __init__(self, api_key, page_id, page_size=2):
 53 |         self.notion = Client(auth=api_key, logger=logger)
 54 |         self.page_id = page_id
 55 |         self.page_size = page_size
 56 |         self.all_blocks = []
 57 |         self.output_blocks = []
 58 | 
 59 |     @staticmethod
 60 |     def get_delete_conf(key_path):
 61 |         result = NotionExporter.delete_conf.copy()
 62 |         for key in key_path:
 63 |             # Number in path is json array placeholder
 64 |             if isinstance(key, int):
 65 |                 if isinstance(result, list) and len(result) > 0:
 66 |                     result = result[0]  # type: ignore
 67 |                 else:
 68 |                     result = None
 69 |             elif isinstance(result, dict) and key in result:
 70 |                 # If prefix path has __any__ conf, then delete all children
 71 |                 if result[key] == "__any__":
 72 |                     return ["__any__"]
 73 |                 else:
 74 |                     result = result[key]
 75 |             else:
 76 |                 result = None
 77 | 
 78 |         if (isinstance(result, list)):
 79 |             return result
 80 |         elif (isinstance(result, str) or isinstance(result, bool) or isinstance(result, int)):
 81 |             return [result]
 82 |         else:
 83 |             return [None]
 84 | 
 85 |     @staticmethod
 86 |     def check_is_delete(key_path: list, value):
 87 |         delete_values = NotionExporter.get_delete_conf(key_path)
 88 |         if value in delete_values or '__any__' in delete_values:
 89 |             return True
 90 |         # logger.debug(f"Check key: {key_path}, value: {value}, delete values: {delete_values}")
 91 |         return False
 92 | 
 93 |     @staticmethod
 94 |     def keep_dict_pathvalue(data, path, value):
 95 |         for i, key in enumerate(path):
 96 |             if isinstance(key, int):
 97 |                 data = data[key]
 98 |             elif i == len(path) - 1:
 99 |                 data[key] = value
100 |             else:
101 |                 next_key = path[i+1] if i+1 < len(path) else None
102 |                 if key in data:
103 |                     if isinstance(next_key, int):
104 |                         if not isinstance(data[key], list):
105 |                             logger.error(f"Keep error: {i}, {path}, {data[key]}")
106 |                             return
107 |                         data[key].extend([{} for _ in range(next_key - len(data[key]) + 1)])
108 |                     else:
109 |                         if not isinstance(data[key], dict):
110 |                             logger.error(f"Keep error: {i}, {path}, {data[key]}")
111 |                             return
112 |                 else:
113 |                     if isinstance(next_key, int):
114 |                         data[key] = [{} for _ in range(next_key + 1)]
115 |                     else:
116 |                         data[key] = {}
117 | 
118 |                 data = data[key]
119 |         return
120 | 
121 |     @retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=3, max=30), retry=retry_if_exception_type(notion_errors.RequestTimeoutError))
122 |     def __get_children_blocks(self):
123 |         children = self.notion.blocks.children.list(block_id=self.page_id, page_size=self.page_size)
124 |         if not isinstance(children, dict):
125 |             logger.error(f"Get children failed: {children}")
126 |             return None
127 | 
128 |         loop_count = 1
129 |         while isinstance(children, dict) and "has_more" in children and children["has_more"]:
130 |             next_cursor = children["next_cursor"]
131 |             self.all_blocks.extend(children["results"])
132 |             children = self.notion.blocks.children.list(
133 |                 block_id=self.page_id, page_size=self.page_size, start_cursor=next_cursor)
134 |             loop_count += 1
135 |             cur_content = json.dumps(children, indent=4, ensure_ascii=False)
136 |             logger.debug(f'Get child, {loop_count}: {cur_content}')
137 | 
138 |         if isinstance(children, dict) and "has_more" in children and not children["has_more"]:
139 |             self.all_blocks.extend(children["results"])
140 |         return children
141 | 
142 |     @staticmethod
143 |     def export_dict(data):
144 |         clean_block = {}
145 |         stack = [(data, list())]
146 |         while stack:
147 |             cur, prefix = stack.pop()
148 |             if isinstance(cur, dict):
149 |                 for k, v in cur.items():
150 |                     prefix.append(k)
151 |                     # logger.debug(f"Export dict, prefix: {prefix}, value: {v}")
152 |                     stack.append((v, prefix[:]))
153 |                     prefix.pop()
154 |             elif isinstance(cur, list):
155 |                 for i, v in enumerate(cur):
156 |                     # logger.debug(f"Export array, prefix: {prefix}, {i}, value: {v}")
157 |                     prefix.append(i)
158 |                     stack.append((v, prefix[:]))
159 |                     prefix.pop()
160 |             else:
161 |                 if (not NotionExporter.check_is_delete(prefix[:], cur)):
162 |                     logger.debug(f"Keep {prefix}: {cur}")
163 |                     NotionExporter.keep_dict_pathvalue(clean_block, prefix, cur)
164 |         return clean_block
165 | 
166 |     def export_blocks(self):
167 |         self.__get_children_blocks()
168 |         result = json.dumps(self.all_blocks, indent=4, ensure_ascii=False)
169 |         logger.info(f"Before process, blocks {result}")
170 | 
171 |         if not self.all_blocks:
172 |             logger.error("Get children empty")
173 | 
174 |         for block in self.all_blocks:
175 |             output_block = self.export_dict(block)
176 |             self.output_blocks.append(output_block)
177 | 
178 |         return self.output_blocks
179 | 
180 | 
181 | if __name__ == "__main__":
182 |     test_prepare_conf()
183 |     exporter = NotionExporter(api_key=config['notion']['api_key'],
184 |                               page_id=config['notion']['page_id'],
185 |                               page_size=10)
186 |     exporter.export_blocks()
187 |     print(json.dumps(exporter.output_blocks, indent=4, ensure_ascii=False))
188 | 


--------------------------------------------------------------------------------
/html2notion/translate/notion_import.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import traceback
 4 | from aiohttp import ClientSession
 5 | from pathlib import Path
 6 | from notion_client import AsyncClient
 7 | from notion_client.errors import RequestTimeoutError
 8 | from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 9 | from ..utils import logger, test_prepare_conf, config, rate_limit
10 | from ..translate.html2json import html2json_process
11 | from ..translate.import_stats import ImportStats
12 | 
13 | 
14 | class NotionImporter:
15 |     def __init__(self, session: ClientSession, notion_client):
16 |         self.session = session
17 |         self.notion_client = notion_client
18 |         self.import_stats = ImportStats()
19 | 
20 |     async def process_file(self, file_path: Path):
21 |         self.import_stats.set_filename(file_path)
22 |         try:
23 |             notion_data, html_type = html2json_process(file_path, self.import_stats)
24 |         except Exception as e:
25 |             error_message = traceback.format_exc()
26 |             self.import_stats.set_exception(e)
27 |             logger.error(f"Error processing {file_path}: {str(e)}, {error_message}")
28 |             return "fail"
29 | 
30 |         logger.info(f"Process path: {file_path}, html type: {html_type}, {self.import_stats.get_detail()}")
31 |         try:
32 |             create_result = await self.create_new_page(notion_data)
33 |         except Exception as e:
34 |             error_message = traceback.format_exc()
35 |             self.import_stats.set_exception(e)
36 |             logger.error(f"Error create notion page {file_path}: {str(e)}, {error_message}")
37 |             return "fail"
38 |         logger.info(f"Create notion page: {create_result}")
39 |         return "succ"
40 | 
41 |     # https://developers.notion.com/reference/request-limits
42 |     # The rate limit for incoming requests per integration is an average of three requests per second. 
43 |     # Doc of create page: https://developers.notion.com/reference/post-page
44 |     @retry(stop=stop_after_attempt(5),
45 |            wait=wait_exponential(multiplier=1, min=3, max=30),
46 |            retry=retry_if_exception_type(RequestTimeoutError))
47 |     async def create_new_page(self, notion_data):
48 |         # logger.debug(f'Create new page: {notion_data["parent"]}, {notion_data["properties"]}')
49 |         # body.children.length should be ≤ `100`,
50 |         blocks = notion_data.get("children", [])
51 |         # logger.debug(f'Create new page: {notion_data["parent"]}, {notion_data["properties"]}, blocks: {blocks}')
52 |         
53 |         limit_size = 100
54 |         chunks = [blocks[i: i + limit_size] for i in range(0, len(blocks), limit_size)]
55 |         if blocks:
56 |             notion_data.pop("children")
57 |         first_chunk = chunks[0] if chunks else []
58 |         async with rate_limit:
59 |             created_page = await self.notion_client.pages.create(**notion_data, children=first_chunk)
60 |             page_id = created_page["id"]
61 |             for chunk in chunks[1:]:
62 |                 await self.notion_client.blocks.children.append(page_id, children=chunk)
63 |         return created_page
64 | 
65 | 
66 | async def main(file_path, notion_api_key):
67 |     async with ClientSession() as session:
68 |         async with AsyncClient(auth=notion_api_key) as notion_client:
69 |             importer = NotionImporter(session, notion_client)
70 |             result = await importer.process_file(file_path)
71 |             logger.info(f"Import result: {result}")
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     test_prepare_conf()
76 |     file = Path("./demos/Test Case E.html")
77 |     notion_api_key = ""
78 |     if 'GITHUB_ACTIONS' in os.environ:
79 |         notion_api_key = os.environ['notion_api_key']
80 |     else:
81 |         notion_api_key = config['notion']['api_key']
82 |     asyncio.run(main(file, notion_api_key))
83 | 


--------------------------------------------------------------------------------
/html2notion/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .log import logger, setup_logger
 2 | from .load_config import read_config, config, rate_limit
 3 | from .url_process import is_valid_url
 4 | from .timeutil import DateStrToISO8601
 5 | from pathlib import Path
 6 | 
 7 | 
 8 | def test_prepare_conf():
 9 |     log_path = Path("./logs")
10 |     conf_path = Path("./.config.json")
11 |     setup_logger(log_path)
12 |     read_config(conf_path)
13 |     logger.info(f"test_prepare_conf, log path({log_path}), conf path({conf_path})")
14 | 
15 | 
16 | __all__ = ['logger', 'setup_logger', 'config', 'read_config', 'test_prepare_conf', 'rate_limit', 'is_valid_url', 'DateStrToISO8601']
17 | 


--------------------------------------------------------------------------------
/html2notion/utils/load_config.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | from aiolimiter import AsyncLimiter
 4 | rate_limit = AsyncLimiter(3, 1)
 5 | config = {}
 6 | 
 7 | 
 8 | def read_config(file_path):
 9 |     """
10 |     {
11 |         "notion": {
12 |             "database_id": "xxxxx",
13 |             "api_key": "xxxxx"
14 |         }
15 |     }
16 |     """
17 |     if not file_path.is_file():
18 |         print(f"Load {file_path} fail")
19 |         sys.exit(1)
20 | 
21 |     with open(file_path, "r") as f:
22 |         json_conf = json.load(f)
23 | 
24 |     config.update(json_conf)
25 |     if "notion" not in config:
26 |         raise Exception("notion is not set in config.json")
27 | 
28 |     notion_conf = config["notion"]
29 |     if "database_id" not in notion_conf:
30 |         raise Exception("database_id is not set in config.json")
31 |     if "api_key" not in notion_conf:
32 |         raise Exception("api_key is not set in config.json")
33 |     return
34 | 


--------------------------------------------------------------------------------
/html2notion/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from logging import handlers
 4 | logger = logging.getLogger()
 5 | 
 6 | 
 7 | class CustomFormatter(logging.Formatter):
 8 |     green = "\033[92m"
 9 |     normal = "\x1b[38;21m"
10 |     yellow = "\x1b[33;21m"
11 |     red = "\x1b[31;21m"
12 |     bold_red = "\x1b[31;1m"
13 |     reset = "\x1b[0m"
14 |     format = "%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s" # type: ignore
15 | 
16 |     FORMATS = {
17 |         logging.DEBUG: green + format + reset,  # type: ignore
18 |         logging.INFO: normal + format + reset,  # type: ignore
19 |         logging.WARNING: yellow + format + reset,  # type: ignore
20 |         logging.ERROR: red + format + reset,  # type: ignore
21 |         logging.CRITICAL: bold_red + format + reset  # type: ignore
22 |     }
23 | 
24 |     def format(self, record):
25 |         log_fmt = self.FORMATS.get(record.levelno)
26 |         formatter = logging.Formatter(log_fmt)
27 |         return formatter.format(record)
28 | 
29 | 
30 | def setup_logger(log_path):
31 |     file_path = log_path.joinpath("html2notion_error.log")
32 |     handler = handlers.TimedRotatingFileHandler(
33 |         filename=file_path, when='midnight', backupCount=30, encoding='utf-8')
34 |     handler.setLevel(logging.DEBUG)
35 |     handler.setFormatter(CustomFormatter())
36 |     logger.addHandler(handler)
37 |     logger.setLevel(logging.DEBUG)
38 | 
39 |     logger.debug('Logging debug message')
40 |     logger.info('Logging info message')
41 |     logger.warning('Logging warning message')
42 |     logger.error('Logging error message')
43 | 
44 | 
45 | def log_only_local(content):
46 |     if 'GITHUB_ACTIONS' in os.environ:
47 |         return
48 | 
49 |     from html2notion.utils import logger
50 |     logger.info(content)
51 | 


--------------------------------------------------------------------------------
/html2notion/utils/timeutil.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from . import logger
 3 | 
 4 | 
 5 | def DateStrToISO8601(date_string: str) -> str:
 6 |     """Converts a date string to ISO 8601 format.
 7 | 
 8 |     Args:
 9 |         date_string (str): Date string to convert.
10 | 
11 |     Returns:
12 |         str: ISO 8601 formatted date string.
13 |     """
14 | 
15 |     date_format = "%Y-%m-%d %H:%M:%S %z"
16 |     try:
17 |         date_obj = datetime.strptime(date_string, date_format).astimezone()
18 |     except ValueError:
19 |         logger.warning(f"Invalid date string: {date_string}")
20 |         return ""
21 | 
22 |     output_string = date_obj.isoformat()
23 |     return output_string
24 | 


--------------------------------------------------------------------------------
/html2notion/utils/url_process.py:
--------------------------------------------------------------------------------
 1 | from urllib.parse import urlparse
 2 | 
 3 | 
 4 | def is_valid_url(url):
 5 |     if not isinstance(url, str):
 6 |         return False
 7 |     try:
 8 |         result = urlparse(url)
 9 |         return all([result.scheme, result.netloc]) and is_valid_port(result.port)
10 |     except ValueError:
11 |         return False
12 | 
13 | 
14 | def is_valid_port(port):
15 |     if port is None:
16 |         return True
17 |     return 0 <= port <= 65535
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     print(is_valid_url("https://www.google.com"))  # Returns: True
22 |     print(is_valid_url("google"))  # Returns: False
23 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=54",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4>=4.11.2
 2 | httpcore>=0.16.3
 3 | httpx>=0.23.3
 4 | notion-client>=2.0.0
 5 | PyYAML>=6.0
 6 | aiohttp>=3.8.4
 7 | anyio>=3.6.2
 8 | cos-python-sdk-v5>=1.9.23
 9 | tenacity>=8.2.2
10 | rich>=13.3.4
11 | aiolimiter>=1.0.0
12 | chardet>=5.1.0
13 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = html2notion
 3 | version = 0.2.0
 4 | author = selfboot
 5 | author_email = xuezaigds@gmail.com
 6 | description = This tool can accurately convert HTML to Notion notes and is also useful for exporting Evernote notes to Notion.
 7 | long_description = file: README.md
 8 | long_description_content_type = text/markdown
 9 | url = https://github.com/selfboot/html2notion
10 | license_files = LICENSE
11 | classifiers =
12 |   Programming Language :: Python :: 3
13 |   License :: OSI Approved :: MIT License
14 |   Operating System :: OS Independent
15 | 
16 | [options]
17 | packages = find:
18 | install_requires =
19 |     beautifulsoup4>=4.11.2
20 |     httpcore>=0.16.3
21 |     httpx>=0.23.3
22 |     notion-client>=2.0.0
23 |     PyYAML>=6.0
24 |     aiohttp>=3.8.4
25 |     anyio>=3.6.2
26 |     cos-python-sdk-v5>=1.9.23
27 |     tenacity>=8.2.2
28 |     rich>=13.3.4
29 |     aiolimiter>=1.0.0
30 |     chardet>=5.1.0
31 | 
32 | [options.entry_points]
33 | console_scripts =
34 |     html2notion = html2notion.main:main
35 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup()
4 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import os
 3 | from html2notion.utils import test_prepare_conf, logger
 4 | 
 5 | 
 6 | @pytest.fixture(autouse=True, scope='module')
 7 | def prepare_conf():
 8 |     if 'GITHUB_ACTIONS' not in os.environ:
 9 |         test_prepare_conf()
10 |         logger.info("prepare_conf_fixture")
11 | 


--------------------------------------------------------------------------------
/tests/test_batchimport.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import pytest
 3 | import time
 4 | import os
 5 | from pathlib import Path
 6 | from unittest.mock import patch
 7 | from tempfile import TemporaryDirectory
 8 | from http import HTTPStatus
 9 | from html2notion.translate.batch_import import BatchImport
10 | from html2notion.utils import rate_limit
11 | from html2notion.utils.log import log_only_local
12 | 
13 | process_once_time = 0.5
14 | 
15 | 
16 | async def mock_notion_api_request(file_path, *args, **kwargs):
17 |     class MockResponse:
18 |         def __init__(self, status_code, file_content, elapsed_time):
19 |             self.status_code = status_code
20 |             self.file_content = file_content
21 |             self.elapsed_time = elapsed_time
22 | 
23 |         def json(self):
24 |             return {"result": "success", "file_content": self.file_content, "elapsed_time": self.elapsed_time}
25 | 
26 |     start_time = time.perf_counter()
27 |     content = file_path.read_text()
28 |     if 'GITHUB_ACTIONS' not in os.environ:
29 |         from html2notion.utils import logger
30 |         logger.debug(f"mock_notion_api_request: {file_path}")
31 |     await asyncio.sleep(process_once_time)
32 |     end_time = time.perf_counter()
33 |     elapsed_time = end_time - start_time
34 |     return MockResponse(HTTPStatus.OK, content, elapsed_time)
35 | 
36 | 
37 | async def mock_notion_create_page(notion_data, *args, **kwargs):
38 |     async with rate_limit:
39 |         await asyncio.sleep(0.01)
40 |         log_only_local(f"mock_notion_create_page")
41 |     return "succ"
42 | 
43 | @pytest.fixture(params=[10, 20])
44 | def temp_dir_fixture(request):
45 |     num_files = request.param
46 |     with TemporaryDirectory() as temp_dir:
47 |         dir_path = Path(temp_dir)
48 |         temp_files = []
49 |         for i in range(num_files):
50 |             temp_file = dir_path / f"file{i}.html"
51 |             temp_file.write_text(f"file{i}")
52 |             temp_files.append(temp_file)
53 | 
54 |         yield dir_path
55 | 
56 | 
57 | @pytest.mark.parametrize("concurrent_limit", [2, 5, 10])
58 | @pytest.mark.asyncio
59 | async def test_batch_process(temp_dir_fixture, concurrent_limit):
60 |     dir_path = temp_dir_fixture
61 |     start_time = time.perf_counter()
62 |     with patch("html2notion.translate.notion_import.NotionImporter.process_file", side_effect=mock_notion_api_request):
63 |         batch_processor = BatchImport(
64 |             dir_path, concurrent_limit=concurrent_limit)
65 |         responses = await batch_processor.process_directory()
66 | 
67 |     end_time = time.perf_counter()
68 |     for file_path, response in zip(
69 |             sorted(dir_path.iterdir()),
70 |             sorted(responses, key=lambda x: x.json()["file_content"])):
71 |         assert response.json()["file_content"] == f"{file_path.stem}"
72 | 
73 |     total_time = end_time-start_time
74 |     sync_time = sum(res.json()["elapsed_time"] for res in responses)
75 |     least_time = min(res.json()["elapsed_time"] for res in responses)
76 |     log_only_local(
77 |         f"total_time: {total_time}, sync_time: {sync_time}, least_time: {least_time}")
78 |     assert total_time >= least_time
79 |     assert total_time <= sync_time
80 | 
81 | 
82 | @pytest.mark.parametrize("concurrent_limit", [5, 10, 20])
83 | @pytest.mark.asyncio
84 | async def test_reqlimit(temp_dir_fixture, concurrent_limit):
85 |     dir_path = temp_dir_fixture
86 |     start_time = time.perf_counter()
87 |     with patch("html2notion.translate.notion_import.NotionImporter.create_new_page", side_effect=mock_notion_create_page):
88 |         batch_processor = BatchImport(dir_path, concurrent_limit=concurrent_limit)
89 |         responses = await batch_processor.process_directory()
90 | 
91 |     end_time = time.perf_counter()
92 |     total_time = end_time-start_time
93 |     num_files = len(list(dir_path.glob('*.html')))
94 |     log_only_local(f"file nums: {num_files}, concurrent {concurrent_limit}, total_time: {total_time}")
95 |     # The time deviation within 1 second is acceptable here.
96 |     assert (total_time >= num_files / 3 - 1)


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pathlib import Path
 3 | from unittest.mock import mock_open, patch
 4 | from html2notion.utils import read_config, config
 5 | import pytest
 6 | 
 7 | 
 8 | def test_read_config():
 9 |     mock_file_content = """{
10 |     "notion": {
11 |         "database_id": "test_db_id",
12 |         "api_key": "test_api_key"
13 |     },
14 |     "log_path": "/test/log/path"
15 |     }
16 |     """
17 |     with patch("builtins.open", mock_open(read_data=mock_file_content)):
18 |         with patch.object(Path, "is_file", return_value=True):
19 |             read_config(Path("test_config.json"))
20 |             assert "notion" in config
21 |             assert "database_id" in config["notion"]
22 |             assert "api_key" in config["notion"]
23 |             assert config["notion"]["database_id"] == "test_db_id"
24 |             assert config["notion"]["api_key"] == "test_api_key"
25 |             config.clear()
26 | 
27 |     # Testing for missing database_id, notion, or api_key configurations throws an exception
28 |     with patch("builtins.open", mock_open(read_data="{}")), patch.object(Path, "is_file", return_value=True), pytest.raises(Exception, match="notion is not set in config.json"):
29 |         read_config(Path("test_config.json"))
30 |         config.clear()
31 | 
32 |     with patch("builtins.open", mock_open(read_data="{\"notion\": {}}")), patch.object(Path, "is_file", return_value=True), pytest.raises(Exception, match="database_id is not set in config.json"):
33 |         read_config(Path("test_config.json"))
34 |         config.clear()
35 | 
36 |     with patch("builtins.open", mock_open(read_data="{\"notion\": {\"database_id\": \"test_db_id\"}}")), patch.object(Path, "is_file", return_value=True), pytest.raises(Exception, match="api_key is not set in config.json"):
37 |         read_config(Path("test_config.json"))
38 |         config.clear()
39 | 


--------------------------------------------------------------------------------
/tests/test_cosupload.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import pytest
 3 | import time
 4 | import os
 5 | import random
 6 | import string
 7 | from pathlib import Path
 8 | from unittest.mock import patch
 9 | from tempfile import TemporaryDirectory
10 | from html2notion.translate.batch_import import BatchImport
11 | from html2notion.translate.cos_uploader import TencentCosUploaderAsync
12 | from html2notion.utils.log import log_only_local
13 | 
14 | 
15 | async def mock_cos_upload_request(file_path, *args, **kwargs):
16 |     if 'GITHUB_ACTIONS' not in os.environ:
17 |         from html2notion.utils import config
18 |         secret_id = config["cos"]["secret_id"]
19 |         secret_key = config["cos"]["secret_key"]
20 |         region = config["cos"]["region"]
21 |         bucket = config["cos"]["bucket"]
22 |     else:
23 |         secret_id = os.environ['cos_secret_id']
24 |         secret_key = os.environ['cos_secret_key']
25 |         region = os.environ['cos_region']
26 |         bucket = os.environ['cos_bucket']
27 | 
28 |     start_time = time.perf_counter()
29 |     uploader = TencentCosUploaderAsync(secret_id, secret_key, region, bucket)
30 |     loop = asyncio.get_event_loop()
31 |     key = f"test_workflow/{file_path.name}"
32 |     upload_response = await uploader.upload_file(loop, file_path, key)
33 |     log_only_local(f"Upload response: {upload_response}")
34 | 
35 |     is_exist = await uploader.check_file_exist(loop, key)
36 |     end_time = time.perf_counter()
37 |     elapsed_time = end_time - start_time
38 |     log_only_local(f"Upload elapsed time: {elapsed_time}")
39 |     return (is_exist, elapsed_time)
40 | 
41 | 
42 | @pytest.fixture()
43 | def temp_dir_fixture():
44 |     with TemporaryDirectory() as temp_dir:
45 |         dir_path = Path(temp_dir)
46 |         temp_files = []
47 |         for i in range(20):
48 |             file_size = random.randint(1 * 1024, 1 * 1024 * 1024)
49 |             random_text = "".join(random.choices(string.ascii_letters + string.digits, k=file_size))
50 | 
51 |             temp_file = dir_path / f"file_{i}.html"
52 |             temp_file.write_text(random_text)
53 |             temp_files.append(temp_file)
54 | 
55 |         yield dir_path
56 | 
57 | 
58 | @pytest.mark.asyncio
59 | async def test_batch_cos_upload(temp_dir_fixture):
60 |     concurrent_limit = 5
61 |     dir_path = temp_dir_fixture
62 | 
63 |     start_time = time.perf_counter()
64 |     with patch("html2notion.translate.notion_import.NotionImporter.process_file", side_effect=mock_cos_upload_request):
65 |         batch_processor = BatchImport(
66 |             dir_path, concurrent_limit=concurrent_limit)
67 |         responses = await batch_processor.process_directory()
68 |     end_time = time.perf_counter()
69 | 
70 |     for res in responses:
71 |         assert (res[0])
72 | 
73 |     total_time = end_time - start_time
74 |     elapsed_times = sum([res[1] for res in responses])
75 |     least_tiems = min([res[1] for res in responses])
76 |     log_only_local(f"Time: sum: {elapsed_times}, min {least_tiems}, total: {total_time}")
77 |     assert (total_time < elapsed_times)
78 |     assert (total_time >= least_tiems)
79 | 


--------------------------------------------------------------------------------
/tests/test_demos.py:
--------------------------------------------------------------------------------
 1 | # import glob
 2 | import json
 3 | import os
 4 | from pathlib import Path
 5 | from html2notion.translate.html2json import html2json_process
 6 | from html2notion.translate.import_stats import ImportStats
 7 | from html2notion.translate.html2json_markdown import YinXiangMarkdown_Type
 8 | from html2notion.translate.html2json_clipper import YinXiangClipper_Type
 9 | from html2notion.utils import logger, config
10 | 
11 | 
12 | def test_demo_files():
13 |     if 'GITHUB_ACTIONS' in os.environ:
14 |         database_id = os.environ['notion_db_id_1']
15 |     else:
16 |         database_id = config['notion']['database_id']
17 | 
18 |     testcases = [
19 |         ["./demos/yinxiang_markdown.html", YinXiangMarkdown_Type, "./demos/yinxiang_markdown.json"],
20 |         ["./demos/yinxiang_clipper.html", YinXiangClipper_Type, "./demos/yinxiang_clipper.json"],
21 |         ["./demos/yinxiang_clipper_wx.html", YinXiangClipper_Type, "./demos/yinxiang_clipper_wx.json"],
22 |     ]
23 | 
24 |     for md_file, expect_type, expect_file in testcases:
25 |         import_stats = ImportStats()
26 |         notion_data, html_type = html2json_process(Path(md_file), import_stats)
27 | 
28 |         assert html_type == expect_type
29 |         with open(expect_file, "r") as f:
30 |             content = f.read()
31 | 
32 |         # Replace the placeholder
33 |         content = content.replace("###database_id###", database_id)
34 |         expect = json.loads(content)
35 | 
36 |         # The timezone causes the calculated time to be different, and the check here can be ignored
37 |         try:
38 |             del expect['properties']['Created']['date']['start']
39 |             del notion_data['properties']['Created']['date']['start']
40 |         except KeyError as e:
41 |             pass
42 |         
43 |         # import dictdiffer
44 |         # diff = dictdiffer.diff(notion_data, expect)
45 |         # for d in diff:
46 |         #     logger.debug(f'Diff: {d}')
47 |         # aa = json.dumps(notion_data, ensure_ascii=False)
48 |         # logger.debug(f'notion_data: {aa}')
49 |         assert notion_data ==expect 
50 | 
51 | 


--------------------------------------------------------------------------------
/tests/test_log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from unittest.mock import patch, MagicMock
 3 | from pathlib import Path
 4 | from html2notion.utils import setup_logger, logger
 5 | from html2notion.utils.log import CustomFormatter
 6 | 
 7 | 
 8 | class MockHandler(MagicMock):
 9 |     @property
10 |     def level(self):
11 |         return 0
12 | 
13 | 
14 | @patch('logging.handlers.TimedRotatingFileHandler', new_callable=MockHandler)
15 | def test_setup_logger(mock_handler):
16 |     setup_logger(Path('/fake/path'))
17 | 
18 |     # Assert TimedRotatingFileHandler is called with the correct arguments
19 |     mock_handler.assert_called_once_with(
20 |         filename=Path('/fake/path', 'html2notion_error.log'),
21 |         when='midnight', backupCount=30, encoding='utf-8'
22 |     )
23 | 
24 |     # Assert the mock handler instance is set with the correct level and formatter
25 |     mock_handler.return_value.setLevel.assert_called_once_with(logging.DEBUG)
26 |     assert isinstance(mock_handler.return_value.setFormatter.call_args[0][0], CustomFormatter)
27 | 
28 |     # Assert logger has the correct level
29 |     assert logger.level == logging.DEBUG
30 | 
31 | 
32 | def test_custom_formatter():
33 |     formatter = CustomFormatter()
34 | 
35 |     for level, color in [(logging.DEBUG, "\033[92m"), (logging.INFO, "\x1b[38;21m"),
36 |                          (logging.WARNING, "\x1b[33;21m"), (logging.ERROR, "\x1b[31;21m"),
37 |                          (logging.CRITICAL, "\x1b[31;1m")]:
38 |         record = logging.LogRecord(
39 |             name="test", level=level, pathname='test_path', lineno=0,
40 |             msg="test message", args=None, exc_info=None
41 |         )
42 |         record.filename = "test.py"
43 |         record.lineno = 1
44 | 
45 |         result = formatter.format(record)
46 |         expected_format = f"{color}%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s\x1b[0m"
47 |         expected_message = logging.Formatter(expected_format).format(record)
48 | 
49 |         assert result == expected_message
50 | 


--------------------------------------------------------------------------------
/tests/test_notionexport.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from html2notion.translate.notion_export import NotionExporter
 4 | from html2notion.utils import config
 5 | 
 6 | 
 7 | def test_check_is_delete():
 8 |     del_keyvalue = [
 9 |         (["id"], "95948188-43cb-451f-b538-e0375368ca96"),
10 |         (["parent", "type"], "page_id"),
11 |         (["created_by", "object"], "user"),
12 |         (["paragraph", "rich_text", 0, "text", "link"], None),
13 |         (["paragraph", "rich_text", 0, "annotations", "code"], False),
14 |         (["paragraph", "rich_text", 0, "annotations", "color"], "default"),
15 |         (["quote", "rich_text", 0, "annotations", "color"], "default"),
16 |         (["numbered_list_item", "rich_text", 0, "annotations", "color"], "default"),
17 |         (["bulleted_list_item", "rich_text", 0, "annotations", "color"], "default"),
18 |     ]
19 | 
20 |     for (path, value) in del_keyvalue:
21 |         assert NotionExporter.check_is_delete(path, value)
22 | 
23 |     keep_keyvalue = [
24 |         (["type"], "paragraph"),
25 |         (["type"], "image"),
26 |         (["object"], "block"),
27 |         (["paragraph", "rich_text", 0, "text", "link"], "https://selfboot.com"),
28 |         (["paragraph", "rich_text", 0, "annotations", "code"], True),
29 |         (["paragraph", "rich_text", 0, "annotations", "color"], "red"),
30 |         (["quote", "rich_text", 0, "annotations", "color"], "red"),
31 |         (["numbered_list_item", "rich_text", 0, "annotations", "color"], "red"),
32 |         (["bulleted_list_item", "rich_text", 0, "annotations", "color"], "red"),
33 |         (["bulleted_list_item", "rich_text", 0, "annotations", "code"], True),
34 |     ]
35 |     for (path, value) in keep_keyvalue:
36 |         assert not NotionExporter.check_is_delete(path, value)
37 | 
38 | 
39 | def test_export_blocks():
40 |     if 'GITHUB_ACTIONS' in os.environ:
41 |         api_key = os.environ['notion_api_key']
42 |         page_id = os.environ['notion_page_id_1']
43 |     else:
44 |         api_key = config['notion']['api_key']
45 |         page_id = config['notion']['page_id']
46 | 
47 |     names = locals()
48 |     page_sizes = [1, 5, 10, 100]
49 |     for i in page_sizes:
50 |         names['exporter_' + str(i)] = NotionExporter(
51 |             api_key=api_key,
52 |             page_id=page_id,
53 |             page_size=i)
54 | 
55 |         names['exporter_' + str(i)].export_blocks()
56 |         names['page_json_'+str(i)] = json.dumps(names['exporter_' + str(i)].output_blocks, indent=4, ensure_ascii=False)
57 | 
58 |     for i in page_sizes[1:]:
59 |         if names['page_json_' + str(i)] != names['page_json_' + str(page_sizes[0])]:
60 |             assert False
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     if 'GITHUB_ACTIONS' not in os.environ:
65 |         from html2notion.utils import config, test_prepare_conf
66 |         test_prepare_conf()
67 | 
68 |     test_check_is_delete()
69 |     test_export_blocks()
70 | 


--------------------------------------------------------------------------------
/tests/test_reqlimit.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from html2notion.translate.html2json_yinxiang import Html2JsonYinXiang
 3 | from html2notion.translate.import_stats import ImportStats
 4 | 
 5 | 
 6 | block_max_conent = "Some words" * 200
 7 | one_text_obj = {
 8 |     "plain_text": block_max_conent,
 9 |     "text": {
10 |         "content": block_max_conent
11 |     },
12 |     "type": "text"
13 | }
14 | remain_text_obj = {
15 |     "plain_text": " more words",
16 |     "text": {
17 |         "content": " more words"
18 |     },
19 |     "type": "text"
20 | }
21 | 
22 | 
23 | def test_reqlimit():
24 |     paragram_rich_block = [
25 |         {
26 |             "object": "block",
27 |             "type": "paragraph",
28 |             "paragraph": {
29 |                 "rich_text": [
30 |                     one_text_obj, one_text_obj, remain_text_obj
31 |                 ]
32 |             }
33 |         }
34 |     ]
35 | 
36 |     paragram_rich_content = f'<body><div>{block_max_conent * 2} more words</div></body>'
37 |     import_stats = ImportStats()
38 |     yinxiang = Html2JsonYinXiang(paragram_rich_content, import_stats)
39 |     yinxiang.process()
40 |     json_obj = yinxiang.children
41 |     # print(json.dumps(json_obj, indent=4))
42 |     assert json_obj == paragram_rich_block
43 | 
44 | 
45 | def test_code_reqlimit():
46 |     code_rich_content = f'<body><div style="-en-codeblock: true">{block_max_conent * 2} more words</div></body>'
47 |     import_stats = ImportStats()
48 |     yinxiang = Html2JsonYinXiang(code_rich_content, import_stats)
49 |     yinxiang.process()
50 |     json_obj = yinxiang.children
51 |     # print(json.dumps(json_obj, indent=4))
52 | 
53 |     split_block_result = [
54 |         {
55 |             "object": "block",
56 |             "type": "code",
57 |             "code": {
58 |                 "rich_text": [
59 |                     one_text_obj, one_text_obj, remain_text_obj
60 |                 ],
61 |                 "language": "plain text"
62 |             }
63 |         }
64 |     ]
65 |     assert json_obj == split_block_result
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     # test_reqlimit()
70 |     test_code_reqlimit()
71 | 


--------------------------------------------------------------------------------
/tests/test_util.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from html2notion.utils import DateStrToISO8601, is_valid_url
 3 | 
 4 | 
 5 | def test_date_to_ios8601():
 6 |     valid_date_pair = ["2018-09-20 10:30:36 +0000", "2023-05-12 03:49:56 +0000"]
 7 | 
 8 |     for date_string in valid_date_pair:
 9 |         expect = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S %z").astimezone().isoformat()
10 |         assert DateStrToISO8601(date_string)
11 | 
12 |     invalid_date_pair = ["2018-09-20 10:30", "2018-09-20 10:30:36", "2018-09-20 10:30:36+0800"]
13 |     for date_string in invalid_date_pair:
14 |         assert DateStrToISO8601(date_string) == ""
15 | 
16 | 
17 | def test_is_valid_url():
18 |     valid_urls = [
19 |         "http://www.example.com",
20 |         "https://www.example.com",
21 |         "ftp://www.example.com",
22 |         "http://localhost",
23 |         "http://127.0.0.1",
24 |         "http://example.com/path?query#fragment",
25 |     ]
26 | 
27 |     invalid_urls = [
28 |         "example.com",
29 |         "www.example.com",
30 |         "http://",
31 |         "http:///example.com",
32 |         "http://example.com:80:80",  # Two port numbers
33 |         None,
34 |         123,  # Non-string input
35 |         "",
36 |     ]
37 | 
38 |     for url in valid_urls:
39 |         assert is_valid_url(url) == True, f"Expected {url} to be valid"
40 | 
41 |     for url in invalid_urls:
42 |         assert is_valid_url(url) == False, f"Expected {url} to be invalid"
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     test_date_to_ios8601()
47 | 


--------------------------------------------------------------------------------