├── .github
    └── workflows
    │   ├── codeql-analysis.yml
    │   └── python-publish.yml
├── .gitignore
├── LICENSE
├── README.md
├── requirements.txt
├── setup.py
├── src
    └── notion_df
    │   ├── __init__.py
    │   ├── _pandas.py
    │   ├── agent.py
    │   ├── base.py
    │   ├── blocks.py
    │   ├── configs.py
    │   ├── constants.py
    │   ├── utils.py
    │   └── values.py
└── tests
    ├── test_agent.py
    └── test_base.py


/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ master ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ master ]
20 |   schedule:
21 |     - cron: '29 20 * * 6'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ubuntu-latest
27 |     permissions:
28 |       actions: read
29 |       contents: read
30 |       security-events: write
31 | 
32 |     strategy:
33 |       fail-fast: false
34 |       matrix:
35 |         language: [ 'python' ]
36 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 |         # Learn more about CodeQL language support at https://git.io/codeql-language-support
38 | 
39 |     steps:
40 |     - name: Checkout repository
41 |       uses: actions/checkout@v2
42 | 
43 |     # Initializes the CodeQL tools for scanning.
44 |     - name: Initialize CodeQL
45 |       uses: github/codeql-action/init@v1
46 |       with:
47 |         languages: ${{ matrix.language }}
48 |         # If you wish to specify custom queries, you can do so here or in a config file.
49 |         # By default, queries listed here will override any specified in a config file.
50 |         # Prefix the list here with "+" to use these queries and those in the config file.
51 |         # queries: ./path/to/local/query, your-org/your-repo/queries@main
52 | 
53 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
54 |     # If this step fails, then you should remove it and run the build manually (see below)
55 |     - name: Autobuild
56 |       uses: github/codeql-action/autobuild@v1
57 | 
58 |     # ℹ️ Command-line programs to run using the OS shell.
59 |     # 📚 https://git.io/JvXDl
60 | 
61 |     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
62 |     #    and modify them (or add more) to build your code if your project
63 |     #    uses a compiled language
64 | 
65 |     #- run: |
66 |     #   make bootstrap
67 |     #   make release
68 | 
69 |     - name: Perform CodeQL Analysis
70 |       uses: github/codeql-action/analyze@v1
71 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   release-pypi:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |     - uses: actions/checkout@v2
12 |     - name: Set up Python
13 |       uses: actions/setup-python@v2
14 |       with:
15 |         python-version: '3.x'
16 |     - name: Install dependencies
17 |       run: |
18 |         python -m pip install --upgrade pip
19 |         pip install setuptools wheel twine
20 |     - name: Build and publish
21 |       env:
22 |         TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
23 |         TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
24 |       run: |
25 |         python setup.py sdist bdist_wheel
26 |         twine upload dist/*
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.bak
  2 | .gitattributes
  3 | .last_checked
  4 | .gitconfig
  5 | *.bak
  6 | *.log
  7 | *~
  8 | ~*
  9 | _tmp*
 10 | tmp*
 11 | tags
 12 | 
 13 | # Byte-compiled / optimized / DLL files
 14 | __pycache__/
 15 | *.py[cod]
 16 | *$py.class
 17 | 
 18 | # C extensions
 19 | *.so
 20 | 
 21 | # Distribution / packaging
 22 | .Python
 23 | env/
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | wheels/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | 
 40 | # PyInstaller
 41 | #  Usually these files are written by a python script from a template
 42 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 43 | *.manifest
 44 | *.spec
 45 | 
 46 | # Installer logs
 47 | pip-log.txt
 48 | pip-delete-this-directory.txt
 49 | 
 50 | # Unit test / coverage reports
 51 | htmlcov/
 52 | .tox/
 53 | .coverage
 54 | .coverage.*
 55 | .cache
 56 | nosetests.xml
 57 | coverage.xml
 58 | *.cover
 59 | .hypothesis/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # celery beat schedule file
 89 | celerybeat-schedule
 90 | 
 91 | # SageMath parsed files
 92 | *.sage.py
 93 | 
 94 | # dotenv
 95 | .env
 96 | 
 97 | # virtualenv
 98 | .venv
 99 | venv/
100 | ENV/
101 | 
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 | 
106 | # Rope project settings
107 | .ropeproject
108 | 
109 | # mkdocs documentation
110 | /site
111 | 
112 | # mypy
113 | .mypy_cache/
114 | 
115 | .vscode
116 | *.swp
117 | 
118 | # osx generated files
119 | .DS_Store
120 | .DS_Store?
121 | .Trashes
122 | ehthumbs.db
123 | Thumbs.db
124 | .idea
125 | 
126 | # pytest
127 | .pytest_cache
128 | 
129 | # tools/trust-doc-nbs
130 | docs_src/.last_checked
131 | 
132 | # symlinks to fastai
133 | docs_src/fastai
134 | tools/fastai
135 | 
136 | # link checker
137 | checklink/cookies.txt
138 | 
139 | # .gitconfig is now autogenerated
140 | .gitconfig
141 | 
142 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Shannon Shen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # `notion-df`: Seamlessly Connecting Notion Database with Pandas DataFrame
  2 | 
  3 | *Please Note: This project is currently in pre-alpha stage. The code are not appropriately documented and tested. Please report any issues you find. Thanks!*
  4 | 
  5 | ## Installation
  6 | 
  7 | ```bash
  8 | pip install notion-df
  9 | ```
 10 | 
 11 | ## Usage
 12 | 
 13 | - Before starting, please follow the instructions to [create a new integration](https://www.notion.com/my-integrations) and [add it to your Notion page or database](https://developers.notion.com/docs/getting-started#step-2-share-a-database-with-your-integration). 
 14 |     - We'll refer `Internal Integration Token` as the `api_key` below.
 15 | 
 16 | - Pandas-flavored APIs: Just need to add two additional lines of code:
 17 |     ```python
 18 |     import notion_df
 19 |     notion_df.pandas() #That's it!
 20 |     
 21 |     page_url = "paste your page url from Notion"
 22 |     api_key = "paste your api key (internal integration key)"
 23 |     
 24 |     import pandas as pd
 25 |     df = pd.read_notion(page_url, api_key=api_key)
 26 |     df.to_notion(page_url, api_key=api_key)
 27 |     ```
 28 | 
 29 | - Download your Notion table as a pandas DataFrame
 30 |     ```python
 31 |     import notion_df
 32 |     df = notion_df.download(notion_database_url, api_key=api_key)
 33 |     # Equivalent to: df = pd.read_notion(notion_database_url, api_key=api_key)
 34 |     df.head()
 35 |     ```
 36 |     <details>
 37 |     <summary>Only downloading the first `nrows` from a database</summary>
 38 |     
 39 |     ```python
 40 |     df = notion_df.download(notion_database_url, nrows=nrows) #e.g., 10
 41 |     ```
 42 | 
 43 |     </details>
 44 |     
 45 |     <details>
 46 |     <summary>What if your table has a relation column?</summary>
 47 |     
 48 |     ```python
 49 |     df = notion_df.download(notion_database_url, 
 50 |                             resolve_relation_values=True)
 51 |     ```
 52 |     The `resolve_relation_values=True` will automatically resolve the linking for all the relation columns whose target can be accessed by the current notion integration.
 53 | 
 54 |     In details, let's say the `"test"` column in df is a relation column in Notion. 
 55 |     1. When `resolve_relation_values=False`, the return results for that column will be a list of UUIDs of the target page: `['65e04f11-xxxx', 'b0ffcb4b-xxxx', ]`. 
 56 |     2.  When `resolve_relation_values=True`, the return results for that column will be a list of regular strings corresponding to the name column of the target pages: `['page1', 'page2', ]`. 
 57 | 
 58 |     </details>
 59 | 
 60 | - Append a local `df` to a Notion database:
 61 | 
 62 |     ```python
 63 |     import notion_df
 64 |     notion_df.upload(df, notion_database_url, title="page-title", api_key=api_key)
 65 |     # Equivalent to: df.to_notion(notion_database_url, title="page-title", api_key=api_key)
 66 |     ```
 67 | 
 68 | - Upload a local `df` to a newly created database in a Notion page:
 69 |     
 70 |     ```python
 71 |     import notion_df
 72 |     notion_df.upload(df, notion_page_url, title="page-title", api_key=api_key)
 73 |     # Equivalent to: df.to_notion(notion_page_url, title="page-title", api_key=api_key)
 74 |     ```
 75 | 
 76 | - Tired of typing `api_key=api_key` each time?
 77 | 
 78 |     ```python
 79 |     import notion_df
 80 |     notion_df.config(api_key=api_key) # Or set an environment variable `NOTION_API_KEY`
 81 |     df = notion_df.download(notion_database_url)
 82 |     notion_df.upload(df, notion_page_url, title="page-title")
 83 |     # Similarly in pandas APIs: df.to_notion(notion_page_url, title="page-title")
 84 |     ```
 85 | 
 86 | ## Development 
 87 | 
 88 | 1. Clone the repo and install the dependencies:
 89 |     ```bash
 90 |     git clone git@github.com:lolipopshock/notion-df.git
 91 |     cd notion-df
 92 |     pip install -e .[dev]
 93 |     ```
 94 | 2. How to run tests?
 95 |     ```bash
 96 |     NOTION_API_KEY="<the-api-key>" pytest tests/
 97 |     ```
 98 |     The tests are dependent on a list of notebooks, specified by the following environment variables:
 99 |     
100 | | Environment Variable        | Description                             |
101 | | --------------------------- | --------------------------------------- |
102 | | `NOTION_API_KEY`            | The API key for your Notion integration |
103 | | `NOTION_ROLLUP_DF`          | -                                       |
104 | | `NOTION_FILES_DF`           | -                                       |
105 | | `NOTION_FORMULA_DF`         | -                                       |
106 | | `NOTION_RELATION_DF`        | -                                       |
107 | | `NOTION_RELATION_TARGET_DF` | -                                       |
108 | | `NOTION_LONG_STRING_DF`     | -                                       |
109 | | `NOTION_RICH_TEXT_DF`       | -                                       |
110 |     
111 | 
112 | ## TODOs
113 | 
114 | - [ ] Add tests for
115 |     - [ ] `load` 
116 |     - [ ] `upload` 
117 |     - [ ] `values.py`
118 |     - [ ] `configs.py`
119 |     - [ ] `base.py`
120 | - [ ] Better class organizations/namings for `*Configs` and `*Values`
121 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | notion-client>=0.8.0
2 | pydantic~=1.9.0
3 | pandas 
4 | dataclasses


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | import os
 3 | 
 4 | 
 5 | def get_requirements(req_file):
 6 |     reqs = []
 7 |     with open(req_file, "r") as fp:
 8 |         for line in fp.readlines():
 9 |             if line.startswith("#") or line.strip() == "":
10 |                 continue
11 |             else:
12 |                 reqs.append(line.strip())
13 |     return reqs
14 | 
15 | 
16 | # A trick from https://github.com/jina-ai/jina/blob/79b302c93b01689e82cf4b52f46522eb7497c404/setup.py#L20
17 | libinfo_py = os.path.join("src", "notion_df", "__init__.py")
18 | libinfo_content = open(libinfo_py, "r", encoding="utf8").readlines()
19 | version_line = [l.strip() for l in libinfo_content if l.startswith("__version__")][0]
20 | exec(version_line)  # gives __version__
21 | 
22 | setup(
23 |     name="notion-df",
24 |     version=__version__,
25 |     description="Notion-DF: Seamlessly Connecting Notion Database with Pandas DataFrame",
26 |     author="Zejiang Shen",
27 |     author_email="zejiangshen@gmail.com",
28 |     license="MIT",
29 |     url="https://github.com/lolipopshock/notion-df",
30 |     package_dir={"": "src"},
31 |     packages=find_packages("src"),
32 |     long_description=open("README.md", "r", encoding="utf-8").read(),
33 |     long_description_content_type="text/markdown",
34 |     python_requires=">=3.6",
35 |     install_requires=get_requirements("requirements.txt"),
36 |     extras_require={
37 |         "dev": [
38 |             "black==21.12b0",
39 |             "pytest",
40 |         ],
41 |     }
42 | )


--------------------------------------------------------------------------------
/src/notion_df/__init__.py:
--------------------------------------------------------------------------------
1 | from notion_df.agent import download, upload, config
2 | from notion_df._pandas import pandas
3 | 
4 | __version__ = "0.0.5"
5 | 


--------------------------------------------------------------------------------
/src/notion_df/_pandas.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | from notion_df import upload, download
  3 | 
  4 | 
  5 | def read_notion(
  6 |     notion_url: str,
  7 |     nrows: Optional[int] = None,
  8 |     resolve_relation_values: bool = False,
  9 |     errors: str = "strict",
 10 |     api_key: str = None,
 11 | ) -> "pd.DataFrame":
 12 |     """Download a Notion database as a pandas DataFrame.
 13 | 
 14 |     Args:
 15 |         notion_url (str):
 16 |             The URL of the Notion database to download from.
 17 |         nrows (int, optional):
 18 |             Number of rows of file to read. Useful for reading
 19 |             pieces of large files.
 20 |         resolve_relation_values (bool, optional):
 21 |             By default, when downloading relation columns, notion-df
 22 |             will just download the object ids. If set `resolve_relation_values`
 23 |             to `True`, notion-df will try to pull the values of the title 
 24 |             column from the target table and map the object ids to those values.
 25 |             Defaults to False.  
 26 |         errors (str, optional):
 27 |             You can specify how to handle errors during downloading. There
 28 |             are several options:
 29 |                 1. "strict": raise an error when there is one.
 30 |                 2. "ignore": ignore errors.
 31 |                 3. "warn": print the error message.
 32 |             Defaults to "strict".
 33 |         api_key (str, optional):
 34 |             The API key of the Notion integration.
 35 |             Defaults to None.
 36 |     Returns:
 37 |         pd.DataFrame: the loaded dataframe.
 38 |     """
 39 |     return download(
 40 |         notion_url,
 41 |         nrows=nrows,
 42 |         resolve_relation_values=resolve_relation_values,
 43 |         errors=errors,
 44 |         api_key=api_key,
 45 |     )
 46 | 
 47 | 
 48 | def to_notion(
 49 |     self,
 50 |     notion_url: str,
 51 |     schema=None,
 52 |     mode: str = "a",
 53 |     title: str = "",
 54 |     title_col: str = "",
 55 |     errors: str = "strict",
 56 |     resolve_relation_values: bool = False,
 57 |     create_new_rows_in_relation_target: bool = False,
 58 |     return_response: bool = False,
 59 |     api_key: str = None,
 60 | ):
 61 | 
 62 |     """Upload a dataframe to the specified Notion database.
 63 | 
 64 |     Args:
 65 |         df (pd.DataFrame):
 66 |             The dataframe to upload.
 67 |         notion_url (str):
 68 |             The URL of the Notion page to upload to.
 69 |             If it is a notion page, then it will create a new database
 70 |             under that page and upload the dataframe to it.
 71 |         schema (DatabaseSchema, optional):
 72 |             The schema of the Notion database.
 73 |             When not set, it will be inferred from (1) the target
 74 |             notion database (if it is) then (2) the dataframe itself.
 75 |         mode (str, optional):
 76 |             (the function is not supported yet.)
 77 |             Whether to append to the database or overwrite.
 78 |             Defaults to "a".
 79 |         title (str, optional):
 80 |             The title of the Notion database.
 81 |             Defaults to "".
 82 |         title_col (str, optional):
 83 |             Every Notion database requires a "title" column.
 84 |             When the schema is not set, by default it infers the first
 85 |             column of uploaded dataframe as the title column. You can
 86 |             set this value to specify the title column.
 87 |             Defaults to "".
 88 |         errors (str, optional):
 89 |             Since we upload the dataframe to Notion row by row, you
 90 |             can specify how to handle errors during uploading. There
 91 |             are several options:
 92 |                 1. "strict": raise an error when there is one.
 93 |                 2. "ignore": ignore errors and continue uploading
 94 |                     subsequent rows.
 95 |                 3. "warn": print the error message and continue uploading
 96 |             Defaults to "strict".
 97 |         resolve_relation_values (bool, optional):
 98 |             If `True`, notion-df assumes the items in any relation columns
 99 |             are not notion object ids, but the value of the corresponding 
100 |             "title column" in the target table. It will try to convert the 
101 |             relation column to notion object ids by looking up the value. 
102 |             Defaults to False.
103 |         create_new_rows_in_relation_target (bool, optional):
104 |             This argument is used in conjunction with `resolve_relation_values`.
105 |             If True, then notion-df will try to create new rows in the target
106 |             the relation table if the relation column value is not found there.
107 |             Defaults to False.
108 |         return_response (bool, optional):
109 |             If True, then the function will return a list of responses for
110 |             the updates from Notion.
111 |         api_key (str, optional):
112 |             The API key of the Notion integration.
113 |             Defaults to None.
114 |     """
115 | 
116 |     return upload(
117 |         df=self,
118 |         notion_url=notion_url,
119 |         schema=schema,
120 |         mode=mode,
121 |         title=title,
122 |         title_col=title_col,
123 |         errors=errors,
124 |         resolve_relation_values=resolve_relation_values,
125 |         create_new_rows_in_relation_target=create_new_rows_in_relation_target,
126 |         return_response=return_response,
127 |         api_key=api_key,
128 |     )
129 | 
130 | 
131 | def pandas():
132 |     import pandas as pd
133 | 
134 |     pd.read_notion = read_notion
135 |     pd.DataFrame.to_notion = to_notion
136 | 


--------------------------------------------------------------------------------
/src/notion_df/agent.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Optional, Union, Tuple
  2 | from datetime import datetime
  3 | import warnings
  4 | import os
  5 | from functools import wraps
  6 | 
  7 | import pandas as pd
  8 | from httpx import HTTPStatusError
  9 | from notion_client import Client
 10 | from notion_client.helpers import get_id
 11 | 
 12 | from notion_df.values import PageProperties, PageProperty
 13 | from notion_df.configs import DatabaseSchema, NON_EDITABLE_TYPES
 14 | from notion_df.utils import is_uuid, flatten_dict
 15 | from notion_df.blocks import parse_blocks, BaseNotionBlock
 16 | 
 17 | API_KEY = None
 18 | NOT_REVERSE_DATAFRAME = -1
 19 | # whether to reverse the dataframe when performing uploading.
 20 | # for some reason, notion will reverse the order of dataframe
 21 | # when uploading.
 22 | # -1 for reversing, for not reversing
 23 | NOTION_DEFAULT_PAGE_SIZE = 100
 24 | NOTION_MAX_PAGE_SIZE = 100
 25 | 
 26 | 
 27 | def config(api_key: str):
 28 |     global API_KEY
 29 |     API_KEY = api_key
 30 | 
 31 | 
 32 | def _load_api_key(api_key: str) -> str:
 33 |     if api_key is not None:
 34 |         return api_key
 35 |     elif API_KEY is not None:
 36 |         return API_KEY
 37 |     elif os.environ.get("NOTION_API_KEY") is not None:
 38 |         return os.environ.get("NOTION_API_KEY")
 39 |     else:
 40 |         raise ValueError("No API key provided")
 41 | 
 42 | 
 43 | def _is_notion_database(notion_url):
 44 |     return "?v=" in notion_url.split("/")[-1]
 45 | 
 46 | 
 47 | def use_client(func):
 48 |     @wraps(func)
 49 |     def wrapper(*args, **kwargs):
 50 |         orig_client = client = kwargs.pop("client", None)
 51 | 
 52 |         if client is None:
 53 |             api_key = _load_api_key(kwargs.pop("api_key", None))
 54 |             client = Client(auth=api_key)
 55 |         out = func(client=client, *args, **kwargs)
 56 | 
 57 |         if orig_client is None:
 58 |             # Automatically close the client if it was not passed in
 59 |             client.close()
 60 |         return out
 61 | 
 62 |     return wrapper
 63 | 
 64 | 
 65 | def query_database(
 66 |     database_id: str,
 67 |     client: Client,
 68 |     start_cursor: Optional[str] = None,
 69 |     page_size: int = NOTION_DEFAULT_PAGE_SIZE,
 70 | ):
 71 |     query_dict = {"database_id": database_id, "page_size": page_size}
 72 |     if start_cursor is not None:
 73 |         query_dict["start_cursor"] = start_cursor
 74 |         # For now, Notion API doesn't allow start_cursor='null'
 75 | 
 76 |     query_results = client.databases.query(**query_dict)
 77 | 
 78 |     assert query_results["object"] == "list"
 79 |     return query_results
 80 | 
 81 | 
 82 | def load_df_from_queries(
 83 |     database_query_results: List[Dict],
 84 | ):
 85 |     properties = PageProperties.from_raw(database_query_results)
 86 |     df = properties.to_frame()
 87 | 
 88 |     with warnings.catch_warnings():
 89 |         warnings.simplefilter("ignore")
 90 |         # TODO: figure out a better solution
 91 |         # When doing the following, Pandas may think you are trying
 92 |         # to add a new column to the dataframe; it will show the warnings,
 93 |         # but it will not actually add the column. So we use catch_warnings
 94 |         # to hide the warnings.
 95 |         # However this might not be the best way to do so. Some alternatives
 96 |         # include setting df.attrs https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.attrs.html
 97 |         # Or even use something like multi-level index for saving notion_ids.
 98 |         # Nevertheless, all of them seems not that perfect -- for example,
 99 |         # after copying or slicing, the values will disappear.
100 |         # Should try to figure out a better solution in the future.
101 |         df.notion_urls = pd.Series([ele["url"] for ele in database_query_results])
102 |         df.notion_ids = pd.Series([ele["id"] for ele in database_query_results])
103 |         df.notion_query_results = database_query_results
104 |         # TODO: Rethink if this should be private
105 | 
106 |     return df
107 | 
108 | 
109 | def download_df_from_database(
110 |     notion_url: str,
111 |     client: Client,
112 |     nrows: Optional[int] = None,
113 |     errors: str = "strict",
114 | ) -> pd.DataFrame:
115 |     """Download a Notion database as a pandas DataFrame.
116 | 
117 |     Args:
118 |         notion_url (str):
119 |             The URL of the Notion database to download from.
120 |         nrows (int, optional):
121 |             Number of rows of file to read. Useful for reading
122 |             pieces of large files.
123 |         api_key (str, optional):
124 |             The API key of the Notion integration.
125 |             Defaults to None.
126 |         client (Client, optional):
127 |             The notion client.
128 |             Defaults to None.
129 |     Returns:
130 |         pd.DataFrame: the loaded dataframe.
131 |     """
132 |     if not is_uuid(notion_url):
133 |         assert _is_notion_database(notion_url)
134 |         database_id = get_id(notion_url)
135 |     else:
136 |         database_id = notion_url
137 | 
138 |     # Check the if the id is a database first
139 |     try:
140 |         retrieve_results = client.databases.retrieve(database_id=database_id)
141 |         schema = DatabaseSchema.from_raw(retrieve_results["properties"])
142 |     except HTTPStatusError:
143 |         error_msg = (
144 |             f"The object {database_id} might not be a notion database, "
145 |             "or integration associated with the API key don't have access "
146 |             "to it."
147 |         )
148 |         if errors == "strict":
149 |             raise ValueError(error_msg)
150 |         elif errors == "warn":
151 |             warnings.warn(error_msg)
152 |             return None
153 |         elif errors == "ignore":
154 |             return None
155 | 
156 |     downloaded_rows = []
157 | 
158 |     page_size = NOTION_MAX_PAGE_SIZE
159 |     if nrows is not None:
160 |         if nrows <= NOTION_MAX_PAGE_SIZE:
161 |             page_size = nrows
162 | 
163 |     query_results = query_database(database_id, client, page_size=page_size)
164 |     downloaded_rows.extend(query_results["results"])
165 | 
166 |     while query_results["has_more"]:
167 |         if nrows is not None:
168 |             if len(downloaded_rows) >= nrows:
169 |                 break
170 |             else:
171 |                 page_size = nrows - len(downloaded_rows)
172 |         else:
173 |             page_size = NOTION_MAX_PAGE_SIZE
174 | 
175 |         query_results = query_database(
176 |             database_id,
177 |             client,
178 |             start_cursor=query_results["next_cursor"],
179 |             page_size=page_size,
180 |         )
181 |         downloaded_rows.extend(query_results["results"])
182 | 
183 |     df = load_df_from_queries(downloaded_rows)
184 |     df = schema.create_df(df)
185 |     return df
186 | 
187 | 
188 | @use_client
189 | def download(
190 |     notion_url: str,
191 |     nrows: Optional[int] = None,
192 |     resolve_relation_values: Optional[bool] = False,
193 |     errors: str = "strict",
194 |     *,
195 |     api_key: str = None,
196 |     client: Client = None,
197 | ):
198 |     df = download_df_from_database(
199 |         notion_url=notion_url,
200 |         nrows=nrows,
201 |         client=client,
202 |         errors=errors,
203 |     )
204 |     if resolve_relation_values:
205 |         for col in df.columns:
206 |             if df.schema[col].type == "relation":
207 |                 relation_df = download_df_from_database(
208 |                     df.schema[col].relation.database_id,
209 |                     errors="warn",
210 |                     client=client,
211 |                 )
212 |                 if relation_df is not None:
213 |                     rel_title_col = relation_df.schema.title_column
214 |                     obj_id_to_string = {
215 |                         obj_id: obj_title
216 |                         for obj_id, obj_title in zip(
217 |                             relation_df.notion_ids, relation_df[rel_title_col]
218 |                         )
219 |                     }
220 |                     df[col] = df[col].apply(
221 |                         lambda row: [obj_id_to_string[ele] for ele in row]
222 |                     )
223 |     return df
224 | 
225 | 
226 | def create_database(
227 |     page_id: str, client: Client, schema: DatabaseSchema, title: str = ""
228 | ):
229 |     response = client.databases.create(
230 |         parent={"type": "page_id", "page_id": page_id},
231 |         title=[{"type": "text", "text": {"content": title}}],
232 |         properties=schema.query_dict(),
233 |     )
234 |     assert response["object"] == "database"
235 |     return response
236 | 
237 | 
238 | def upload_row_to_database(row, database_id, schema, children, client) -> Dict:
239 | 
240 |     properties = PageProperty.from_series(row, schema).query_dict()
241 |     if children:
242 |         if not isinstance(children, list):
243 |             children = [children]
244 |         for cid in range(len(children)):
245 |             if isinstance(children[cid], BaseNotionBlock):
246 |                 children[cid] = flatten_dict(children[cid].dict())
247 |                 
248 |         response = client.pages.create(
249 |             parent={"database_id": database_id}, properties=properties, children=children
250 |         )
251 |     else:
252 |         response = client.pages.create(
253 |             parent={"database_id": database_id}, properties=properties,
254 |         )
255 |     return response
256 | 
257 | 
258 | def upload_to_database(df, databse_id, schema, client, errors, children) -> List[Dict]:
259 |     all_response = []
260 |     if children is not None:
261 |         assert len(children) == len(df)
262 |         children = children[::NOT_REVERSE_DATAFRAME]
263 | 
264 |     for idx, (_, row) in enumerate(df[::NOT_REVERSE_DATAFRAME].iterrows(), ):
265 |         try:
266 |             child = children[idx] if children is not None else None
267 |             response = upload_row_to_database(row, databse_id, schema, child, client)
268 |             all_response.append(response)
269 |         except Exception as e:
270 |             if errors == "strict":
271 |                 raise e
272 |             elif errors == "warn":
273 |                 warnings.warn(f"Encountered errors {e} while uploading row: {row}")
274 |             elif errors == "ignore":
275 |                 continue
276 |     return all_response[::NOT_REVERSE_DATAFRAME]
277 | 
278 | 
279 | def load_database_schema(database_id, client):
280 |     return DatabaseSchema.from_raw(
281 |         client.databases.retrieve(database_id=database_id)["properties"]
282 |     )
283 | 
284 | 
285 | @use_client
286 | def upload(
287 |     df: pd.DataFrame,
288 |     notion_url: str,
289 |     schema: DatabaseSchema = None,
290 |     mode: str = "a",
291 |     title: str = "",
292 |     title_col: str = "",
293 |     errors: str = "strict",
294 |     resolve_relation_values: bool = False,
295 |     create_new_rows_in_relation_target: bool = False,
296 |     children: List[Union[Dict, BaseNotionBlock]] = None,
297 |     return_response: bool = False,
298 |     *,
299 |     api_key: str = None,
300 |     client: Client = None,
301 | ) -> Union[str, Tuple[str, List[Dict]]]:
302 |     """Upload a dataframe to the specified Notion database.
303 | 
304 |     Args:
305 |         df (pd.DataFrame):
306 |             The dataframe to upload.
307 |         notion_url (str):
308 |             The URL of the Notion page to upload to.
309 |             If it is a notion page, then it will create a new database
310 |             under that page and upload the dataframe to it.
311 |         schema (DatabaseSchema, optional):
312 |             The schema of the Notion database.
313 |             When not set, it will be inferred from (1) the target
314 |             notion database (if it is) then (2) the dataframe itself.
315 |         mode (str, optional):
316 |             (the function is not supported yet.)
317 |             Whether to append to the database or overwrite.
318 |             Defaults to "a".
319 |         title (str, optional):
320 |             The title of the Notion database.
321 |             Defaults to "".
322 |         title_col (str, optional):
323 |             Every Notion database requires a "title" column.
324 |             When the schema is not set, by default it infers the first
325 |             column of uploaded dataframe as the title column. You can
326 |             set this value to specify the title column.
327 |             Defaults to "".
328 |         errors (str, optional):
329 |             Since we upload the dataframe to Notion row by row, you
330 |             can specify how to handle errors during uploading. There
331 |             are several options:
332 |                 1. "strict": raise an error when there is one.
333 |                 2. "ignore": ignore errors and continue uploading
334 |                     subsequent rows.
335 |                 3. "warn": print the error message and continue uploading
336 |             Defaults to "strict".
337 |         children (List[Union[Dict, BaseNotionBlock]], optional):
338 |             The corresponding children of the uploaded Notion page. It should be
339 |             a list of the same length as the dataframe.
340 |         resolve_relation_values (bool, optional):
341 |             If `True`, notion-df assumes the items in any relation columns
342 |             are not notion object ids, but the value of the corresponding 
343 |             "title column" in the target table. It will try to convert the 
344 |             relation column to notion object ids by looking up the value. 
345 |             Defaults to False.
346 |         create_new_rows_in_relation_target (bool, optional):
347 |             This argument is used in conjunction with `resolve_relation_values`.
348 |             If True, then notion-df will try to create new rows in the target
349 |             the relation table if the relation column value is not found there.
350 |             Defaults to False.
351 |         return_response (bool, optional):
352 |             If True, then the function will return a list of responses for
353 |             the updates from Notion.
354 |         api_key (str, optional):
355 |             The API key of the Notion integration.
356 |             Defaults to None.
357 |         client (Client, optional):
358 |             The notion client.
359 |             Defaults to None.
360 |     """
361 |     if schema is None:
362 |         if hasattr(df, "schema"):
363 |             schema = df.schema
364 | 
365 |     if not _is_notion_database(notion_url):
366 |         if schema is None:
367 |             schema = DatabaseSchema.from_df(df, title_col=title_col)
368 |         database_properties = create_database(get_id(notion_url), client, schema, title)
369 |         databse_id = database_properties["id"]
370 |         notion_url = database_properties["url"]
371 |     else:
372 |         databse_id = get_id(notion_url)
373 |         if schema is None:
374 |             schema = load_database_schema(databse_id, client)
375 | 
376 |     # At this stage, we should have the appropriate schema
377 |     assert schema is not None
378 | 
379 |     if not schema.is_df_compatible(df):
380 |         raise ValueError(
381 |             "The dataframe is not compatible with the database schema."
382 |             "The df contains columns that are not in the databse: "
383 |             + f"{[col for col in df.columns if col not in schema.configs.keys()]}"
384 |         )
385 | 
386 |     if mode not in ("a", "append"):
387 |         raise NotImplementedError
388 |         # TODO: clean the current values in the notion database (if any)
389 | 
390 |     df = schema.transform(df, remove_non_editables=True)
391 | 
392 |     # Assumes the notion database is created and has the appropriate schema
393 |     if resolve_relation_values:
394 |         for col in df.columns:
395 |             if schema[col].type == "relation":
396 |                 
397 |                 if df[col].apply(lambda row: all([is_uuid(ele) for ele in row])).all():
398 |                     # The column is all in uuid, we don't need to resolve it 
399 |                     continue 
400 | 
401 |                 # Try to download the target_relation_df   
402 |                 relation_db_id = schema[col].relation.database_id
403 |                 relation_df = download_df_from_database(
404 |                     relation_db_id,
405 |                     errors="warn",
406 |                     client=client,
407 |                 )
408 | 
409 |                 if relation_df is not None:
410 |                     rel_title_col = relation_df.schema.title_column
411 |                     obj_string_to_id = {
412 |                         obj_title: obj_id
413 |                         for obj_id, obj_title in zip(
414 |                             relation_df.notion_ids, relation_df[rel_title_col]
415 |                         )
416 |                     }
417 | 
418 |                     all_unique_obj_strings_in_relation_df = set(
419 |                         relation_df[rel_title_col].tolist()
420 |                     )
421 |                     all_unique_obj_strings_in_df = set(sum(df[col].tolist(), []))
422 |                     # This assumes the column has been transformed to a list of lists;
423 |                     # which is a true assumption given the transformation for the relation
424 |                     # column (LIST_TRANSFORM).
425 |                     new_object_strings = all_unique_obj_strings_in_df.difference(
426 |                         all_unique_obj_strings_in_relation_df
427 |                     )
428 | 
429 |                     if create_new_rows_in_relation_target and len(new_object_strings) > 0:
430 |                         new_relation_df = pd.DataFrame(
431 |                             list(new_object_strings), columns=[rel_title_col]
432 |                         )
433 |                         responses = upload_to_database(
434 |                             new_relation_df,
435 |                             relation_db_id,
436 |                             relation_df.schema,
437 |                             client,
438 |                             "warn",
439 |                         )
440 |                         appended_relation_df = load_df_from_queries(responses)
441 |                         obj_string_to_id.update(
442 |                             {
443 |                                 obj_title: obj_id
444 |                                 for obj_id, obj_title in zip(
445 |                                     appended_relation_df.notion_ids,
446 |                                     appended_relation_df[rel_title_col],
447 |                                 )
448 |                             }
449 |                         )
450 | 
451 |                     df[col] = df[col].apply(
452 |                         lambda row: [obj_string_to_id[ele] for ele in row if ele in obj_string_to_id]
453 |                     )
454 | 
455 |     response = upload_to_database(df, databse_id, schema, client, errors, children)
456 | 
457 |     print(f"Your dataframe has been uploaded to the Notion page: {notion_url} .")
458 |     if return_response:
459 |         return notion_url, response
460 |     return notion_url
461 | 
462 | @use_client
463 | def download_page_children(
464 |     notion_url: str,
465 |     api_key: str = None,
466 |     client: Client = None,   
467 | ):
468 |     """Download the children of a Notion page.
469 | 
470 |     Args:
471 |         notion_url (str):
472 |             The url of the Notion page.
473 |         api_key (str, optional):
474 |             The API key of the Notion integration.
475 |             Defaults to None.
476 |         client (Client, optional):
477 |             The notion client.
478 |             Defaults to None.
479 |     """
480 |     page_id = get_id(notion_url)
481 |     r = client.blocks.children.list(block_id=page_id)
482 |     return parse_blocks(r['results'], recursive=True, client=client)


--------------------------------------------------------------------------------
/src/notion_df/base.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Optional, Any
  2 | from enum import Enum
  3 | from pydantic import BaseModel, validator, root_validator
  4 | import pandas as pd
  5 | 
  6 | from notion_df.utils import is_time_string, is_uuid
  7 | from notion_df.constants import RICH_TEXT_CONTENT_MAX_LENGTH
  8 | 
  9 | ### All colors supported in NOTION
 10 | 
 11 | 
 12 | class NotionColorEnum(str, Enum):
 13 |     Default = "default"
 14 |     Gray = "gray"
 15 |     Brown = "brown"
 16 |     Orange = "orange"
 17 |     Yellow = "yellow"
 18 |     Green = "green"
 19 |     Blue = "blue"
 20 |     Purple = "purple"
 21 |     Pink = "pink"
 22 |     Red = "red"
 23 | 
 24 | 
 25 | class NotionExtendedColorEnum(str, Enum):
 26 |     Default = "default"
 27 |     Gray = "gray"
 28 |     Brown = "brown"
 29 |     Orange = "orange"
 30 |     Yellow = "yellow"
 31 |     Green = "green"
 32 |     Blue = "blue"
 33 |     Purple = "purple"
 34 |     Pink = "pink"
 35 |     Red = "red"
 36 |     GrayBackground = "gray_background"
 37 |     BrownBackground = "brown_background"
 38 |     OrangeBackground = "orange_background"
 39 |     YellowBackground = "yellow_background"
 40 |     GreenBackground = "green_background"
 41 |     BlueBackground = "blue_background"
 42 |     PurpleBackground = "purple_background"
 43 |     PinkBackground = "pink_background"
 44 |     RedBackground = "red_background"
 45 | 
 46 | 
 47 | class RichTextTypeEnum(str, Enum):
 48 |     Text = "text"
 49 |     Mention = "mention"
 50 |     Equation = "equation"
 51 | 
 52 | 
 53 | class SelectOption(BaseModel):
 54 |     id: Optional[str]
 55 |     name: str
 56 |     color: Optional[NotionColorEnum]
 57 | 
 58 |     @classmethod
 59 |     def from_value(cls, value: str):
 60 |         return cls(name=value)
 61 | 
 62 |     @validator("name")
 63 |     def name_cannot_contain_comma(cls, v):
 64 |         if "," in v:
 65 |             raise ValueError(f"Invalid option name {v} that contains comma")
 66 |         return v
 67 | 
 68 | 
 69 | class SelectOptions(BaseModel):
 70 |     options: Optional[List[SelectOption]]
 71 | 
 72 |     @classmethod
 73 |     def from_value(cls, values: List[str]):
 74 |         return cls(options=[SelectOption.from_value(value) for value in values])
 75 | 
 76 | 
 77 | class RelationObject(BaseModel):
 78 |     id: str
 79 |     # TODO: Change this to UUID validation
 80 | 
 81 |     @classmethod
 82 |     def from_value(cls, value: str):
 83 |         return cls(id=value)
 84 | 
 85 |     @validator("id")
 86 |     def id_must_be_uuid(cls, v):
 87 |         if not is_uuid(v):
 88 |             raise ValueError(f"Invalid id {v}")
 89 |         return v
 90 | 
 91 | 
 92 | class UserObject(BaseModel):
 93 |     object: str = "user"
 94 |     id: str
 95 |     type: Optional[str]
 96 |     name: Optional[str]
 97 |     avatar_url: Optional[str]
 98 | 
 99 |     @classmethod
100 |     def from_value(cls, value: str):
101 |         return cls(id=value)
102 | 
103 |     @validator("object")
104 |     def object_is_name(cls, v):
105 |         if v != "user":
106 |             raise ValueError(f"Invalid user object value {v}")
107 |         return v
108 | 
109 |     @property
110 |     def value(self):
111 |         return self.name
112 | 
113 | 
114 | class NumberFormat(BaseModel):
115 |     format: str
116 | 
117 | 
118 | class FormulaProperty(BaseModel):
119 |     expression: str
120 | 
121 | 
122 | class RelationProperty(BaseModel):
123 |     database_id: str
124 |     # TODO: Change this to UUID validation
125 |     synced_property_name: Optional[str]
126 |     synced_property_id: Optional[str]
127 | 
128 | 
129 | class DateObject(BaseModel):
130 |     start: Optional[str] = None
131 |     end: Optional[str] = None
132 |     time_zone: Optional[str] = None
133 | 
134 |     @validator("start")
135 |     def is_start_ISO8601(cls, v):
136 |         # TODO: Currently it cannot suport time ranges
137 |         if v is not None:
138 |             if not is_time_string(v):
139 |                 raise ValueError(
140 |                     "The data start is not appropriately formatted as an ISO 8601 date string."
141 |                 )
142 |         return v
143 | 
144 |     @validator("end")
145 |     def is_end_ISO8601(cls, v):
146 |         if v is not None:
147 |             if not is_time_string(v):
148 |                 raise ValueError(
149 |                     "The data end is not appropriately formatted as an ISO 8601 date string."
150 |                 )
151 |         return v
152 | 
153 |     @classmethod
154 |     def from_value(cls, value: str):
155 |         return cls(start=value)
156 |         # TODO: Now we assume the value has already been formated as strings
157 |         # But we should parse them into appropriate formats.
158 | 
159 |     @property
160 |     def value(self):
161 |         return pd.to_datetime(self.start)
162 |         # TODO: what should the data structure be if self.end is not None?
163 | 
164 | 
165 | class RollupProperty(BaseModel):
166 |     relation_property_name: Optional[str]
167 |     relation_property_id: Optional[str]
168 |     rollup_property_name: Optional[str]
169 |     rollup_property_id: Optional[str]
170 |     function: str
171 |     # TODO: Change this to ENUM - https://developers.notion.com/reference/create-a-database#rollup-configuration
172 | 
173 | 
174 | class RollupObject(BaseModel):
175 |     type: str
176 |     # TODO: Change this to ENUM - https://developers.notion.com/reference/property-value-object#rollup-property-values
177 |     number: Optional[float]
178 |     date: Optional[DateObject]
179 |     array: Optional[List[Any]]
180 |     # Based on the description in https://developers.notion.com/reference/property-value-object#rollup-property-value-element
181 |     # Each element is exactly like property value object, but without the "id" key.
182 |     # As there's a preprocess step in RollupValues, each item of the array must
183 |     # be a property value object.
184 |     function: Optional[str]
185 |     # Though the function param doesn't appear in the documentation, it exists
186 |     # in the return values of the API. Set it as optional for future compatibility.
187 |     # TODO: check in the future if the function param should be updated.
188 | 
189 |     @validator("type")
190 |     def ensure_non_empty_data(cls, v):
191 |         data_type = v
192 |         if data_type is None:
193 |             raise ValueError("RollupObject must have a type.")
194 |         if data_type not in ["number", "date", "array"]:
195 |             raise ValueError(f"RollupObject type {data_type} is invalid.")
196 |         return v
197 | 
198 |     @property
199 |     def value(self):
200 |         if self.type == "number":
201 |             return self.number
202 |         if self.type == "date":
203 |             if self.date is not None:
204 |                 return self.date.value
205 |         if self.type == "array":
206 |             return [ele.value for ele in self.array]
207 | 
208 | 
209 | class FileTargetObject(BaseModel):
210 |     url: str
211 |     expiry_time: Optional[str]
212 | 
213 |     @property
214 |     def value(self):
215 |         return self.url
216 | 
217 | 
218 | class FileObject(BaseModel):
219 |     name: Optional[str] #TODO: Figure out why this is not required...
220 |     type: str
221 |     file: Optional[FileTargetObject]
222 |     external: Optional[FileTargetObject]
223 | 
224 |     @property
225 |     def value(self):
226 |         if self.type == "file":
227 |             if self.file is not None:
228 |                 return self.file.value
229 |         else:
230 |             if self.external is not None:
231 |                 return self.external.value
232 | 
233 | 
234 | class FormulaObject(BaseModel):
235 |     type: str
236 |     string: Optional[str]
237 |     number: Optional[float]
238 |     boolean: Optional[bool]
239 |     date: Optional[DateObject]
240 | 
241 |     @property
242 |     def value(self):
243 |         if self.type == "string":
244 |             return self.string
245 |         elif self.type == "number":
246 |             return self.number
247 |         elif self.type == "boolean":
248 |             return self.boolean
249 |         elif self.type == "date":
250 |             if self.date is not None:
251 |                 return self.date.value
252 | 
253 | 
254 | class AnnotationObject(BaseModel):
255 |     bold: bool
256 |     italic: bool
257 |     strikethrough: bool
258 |     underline: bool
259 |     code: bool
260 |     color: NotionExtendedColorEnum
261 | 
262 | 
263 | class TextLinkObject(BaseModel):
264 |     type: Optional[str] = "url"
265 |     url: str
266 | 
267 | 
268 | class TextObject(BaseModel):
269 |     content: str
270 |     link: Optional[TextLinkObject]
271 | 
272 | 
273 | class PageReferenceObject(BaseModel):
274 |     id: str
275 | 
276 | 
277 | class LinkPreviewMentionObject(BaseModel):
278 |     url: str
279 | 
280 | 
281 | class MentionObject(BaseModel):
282 |     type: str
283 |     user: Optional[UserObject]
284 |     page: Optional[PageReferenceObject]
285 |     database: Optional[PageReferenceObject]
286 |     date: Optional[DateObject]
287 |     link_preview: Optional[LinkPreviewMentionObject]
288 | 
289 | 
290 | class EquationObject(BaseModel):
291 |     expression: str
292 | 
293 | 
294 | class BaseRichTextObject(BaseModel):
295 |     plain_text: Optional[str]
296 |     # TODO: The Optional[plain_text] is used when creating property values
297 |     href: Optional[str] = None
298 |     annotations: Optional[AnnotationObject] = None
299 |     type: Optional[RichTextTypeEnum]
300 | 
301 |     @property
302 |     def value(self):
303 |         return self.plain_text
304 | 
305 | 
306 | class RichTextObject(BaseRichTextObject):
307 |     text: Optional[TextObject]
308 |     mention: Optional[MentionObject]
309 |     equation: Optional[EquationObject]
310 | 
311 |     @classmethod
312 |     def from_value(cls, value: str):
313 |         return cls(text=TextObject(content=value))
314 | 
315 |     @classmethod
316 |     def encode_string(cls, value: str) -> List["RichTextObject"]:
317 |         chunk_size = RICH_TEXT_CONTENT_MAX_LENGTH
318 |         return [
319 |             cls(text=TextObject(content=value[idx : idx + chunk_size]))
320 |             for idx in range(0, len(value), chunk_size)
321 |         ]
322 | 
323 | 
324 | class EmojiObject(BaseModel):
325 |     type: str = "emoji"
326 |     emoji: str
327 | 


--------------------------------------------------------------------------------
/src/notion_df/blocks.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from typing import List, Union, Dict, Any, Tuple, Optional, Union
  3 | 
  4 | from notion_client import Client
  5 | from pydantic import BaseModel, parse_obj_as, validator, root_validator
  6 | 
  7 | from notion_df.base import (
  8 |     RichTextObject,
  9 |     SelectOption,
 10 |     DateObject,
 11 |     RelationObject,
 12 |     UserObject,
 13 |     RollupObject,
 14 |     FileObject,
 15 |     EmojiObject,
 16 |     FormulaObject,
 17 |     NotionExtendedColorEnum,
 18 | )
 19 | 
 20 | 
 21 | class ParentObject(BaseModel):
 22 |     type: str
 23 |     database_id: Optional[str]
 24 |     page_id: Optional[str]
 25 |     workspace: Optional[bool]
 26 |     block_id: Optional[str]
 27 | 
 28 | 
 29 | # BaseClasses
 30 | class BaseAttributes(BaseModel):
 31 |     pass
 32 | 
 33 | 
 34 | class BaseAttributeWithChildren(BaseModel):
 35 |     children: Optional[List["BaseNotionBlock"]]
 36 | 
 37 | 
 38 | class TextBlockAttributes(BaseAttributeWithChildren):
 39 |     rich_text: List[RichTextObject]
 40 |     color: Optional[NotionExtendedColorEnum]
 41 | 
 42 | 
 43 | class HeadingBlockAttributes(BaseAttributeWithChildren):
 44 |     rich_text: List[RichTextObject]
 45 |     color: Optional[NotionExtendedColorEnum]
 46 |     is_toggleable: bool
 47 |     # Whether or not the heading block is a toggle heading or not. If true, the heading block has toggle and can support children. If false, the heading block is a normal heading block.
 48 | 
 49 | 
 50 | class CalloutBlockAttributes(BaseAttributeWithChildren):
 51 |     rich_text: List[RichTextObject]
 52 |     icon: Optional[Union[FileObject, EmojiObject]]
 53 |     color: Optional[NotionExtendedColorEnum]
 54 | 
 55 | 
 56 | class ToDoBlockAttributes(BaseAttributeWithChildren):
 57 |     rich_text: List[RichTextObject]
 58 |     color: Optional[NotionExtendedColorEnum]
 59 |     checked: Optional[bool]
 60 | 
 61 | 
 62 | class CodeBlockAttributes(BaseAttributes):
 63 |     rich_text: List[RichTextObject]
 64 |     caption: Optional[List[RichTextObject]]
 65 |     language: Optional[str]  # TODO: it's actually an enum
 66 | 
 67 | 
 68 | class ChildPageAttributes(BaseAttributes):
 69 |     title: List[RichTextObject]
 70 | 
 71 | 
 72 | class EmbedBlockAttributes(BaseAttributes):
 73 |     url: str
 74 | 
 75 | 
 76 | class ImageBlockAttributes(BaseAttributes, FileObject):
 77 |     caption: Optional[List[RichTextObject]]
 78 |     # This is not listed in the docs, but it is in the API response (Nov 2022)
 79 | 
 80 | 
 81 | class VideoBlockAttributes(BaseAttributes):
 82 |     video: FileObject
 83 | 
 84 | 
 85 | class FileBlockAttributes(BaseAttributes):
 86 |     file: FileObject
 87 |     caption: Optional[List[RichTextObject]]
 88 | 
 89 | 
 90 | class PdfBlockAttributes(BaseAttributes):
 91 |     pdf: FileObject
 92 | 
 93 | 
 94 | class BookmarkBlockAttributes(BaseAttributes):
 95 |     url: str
 96 |     caption: Optional[List[RichTextObject]]
 97 | 
 98 | 
 99 | class EquationBlockAttributes(BaseAttributes):
100 |     expression: str
101 | 
102 | 
103 | class TableOfContentsAttributes(BaseAttributes):
104 |     color: Optional[NotionExtendedColorEnum]
105 | 
106 | 
107 | class LinkPreviewAttributes(BaseAttributes):
108 |     url: str
109 | 
110 | 
111 | class LinkToPageAttributes(BaseAttributes):
112 |     type: str
113 |     page_id: Optional[str]
114 |     database_id: Optional[str]
115 | 
116 | 
117 | ATTRIBUTES_MAPPING = {
118 |     _cls.__name__: _cls
119 |     for _cls in BaseAttributes.__subclasses__()
120 |     + BaseAttributeWithChildren.__subclasses__()
121 | }
122 | 
123 | 
124 | class BaseNotionBlock(BaseModel):
125 |     object: str = "block"
126 |     parent: Optional[ParentObject]
127 |     id: Optional[str]
128 |     type: Optional[str]
129 |     created_time: Optional[str]
130 |     # created_by
131 |     last_edited_time: Optional[str]
132 |     # created_by
133 |     has_children: Optional[bool]
134 |     archived: Optional[bool]
135 |     type: str
136 | 
137 |     @property
138 |     def children(self):
139 |         return self.__getattribute__(self.type).children
140 | 
141 |     def set_children(self, value: Any):
142 |         self.__getattribute__(self.type).children = value
143 | 
144 | 
145 | class ParagraphBlock(BaseNotionBlock):
146 |     type: str = "paragraph"
147 |     paragraph: TextBlockAttributes
148 | 
149 | 
150 | class HeadingOneBlock(BaseNotionBlock):
151 |     type: str = "heading_1"
152 |     heading_1: HeadingBlockAttributes
153 | 
154 | 
155 | class HeadingTwoBlock(BaseNotionBlock):
156 |     type: str = "heading_2"
157 |     heading_2: HeadingBlockAttributes
158 | 
159 | 
160 | class HeadingThreeBlock(BaseNotionBlock):
161 |     type: str = "heading_3"
162 |     heading_3: HeadingBlockAttributes
163 | 
164 | 
165 | class CalloutBlock(BaseNotionBlock):
166 |     type: str = "callout"
167 |     callout: CalloutBlockAttributes
168 | 
169 | 
170 | class QuoteBlock(BaseNotionBlock):
171 |     type: str = "quote"
172 |     quote: TextBlockAttributes
173 | 
174 | 
175 | class BulletedListItemBlock(BaseNotionBlock):
176 |     type: str = "bulleted_list_item"
177 |     bulleted_list_item: TextBlockAttributes
178 | 
179 | 
180 | class NumberedListItemBlock(BaseNotionBlock):
181 |     type: str = "numbered_list_item"
182 |     numbered_list_item: TextBlockAttributes
183 | 
184 | 
185 | class ToDoBlock(BaseNotionBlock):
186 |     type: str = "to_do"
187 |     to_do: ToDoBlockAttributes
188 | 
189 | 
190 | class ToggleBlock(BaseNotionBlock):
191 |     type: str = "toggle"
192 |     toggle: TextBlockAttributes
193 | 
194 | 
195 | class CodeBlock(BaseNotionBlock):
196 |     type: str = "code"
197 |     code: CodeBlockAttributes
198 | 
199 | 
200 | class ChildPageBlock(BaseNotionBlock):
201 |     type: str = "child_page"
202 |     child_page: ChildPageAttributes
203 | 
204 | 
205 | class ChildDatabaseBlock(BaseNotionBlock):
206 |     type: str = "child_database"
207 |     child_database: ChildPageAttributes
208 | 
209 | 
210 | class EmbedBlock(BaseNotionBlock):
211 |     type: str = "embed"
212 |     embed: EmbedBlockAttributes
213 | 
214 | 
215 | class ImageBlock(BaseNotionBlock):
216 |     type: str = "image"
217 |     image: ImageBlockAttributes
218 | 
219 | 
220 | class VideoBlock(BaseNotionBlock):
221 |     type: str = "video"
222 |     video: VideoBlockAttributes
223 | 
224 | 
225 | class FileBlock(BaseNotionBlock):
226 |     type: str = "file"
227 |     file: FileBlockAttributes
228 | 
229 | 
230 | class PdfBlock(BaseNotionBlock):
231 |     type: str = "pdf"
232 |     pdf: PdfBlockAttributes
233 | 
234 | 
235 | class BookmarkBlock(BaseNotionBlock):
236 |     type: str = "bookmark"
237 |     bookmark: BookmarkBlockAttributes
238 | 
239 | 
240 | class EquationBlock(BaseNotionBlock):
241 |     type: str = "equation"
242 |     equation: EquationBlockAttributes
243 | 
244 | 
245 | class DividerBlock(BaseNotionBlock):
246 |     type: str = "divider"
247 |     divider: Optional[Dict]
248 | 
249 | 
250 | class TableOfContentsBlock(BaseNotionBlock):
251 |     type: str = "table_of_contents"
252 |     table_of_contents: TableOfContentsAttributes
253 | 
254 | 
255 | class BreadcrumbBlock(BaseNotionBlock):
256 |     type: str = "breadcrumb"
257 |     breadcrumb: Optional[Dict]
258 | 
259 | 
260 | # TODO: Column List and Column Blocks
261 | 
262 | 
263 | class LinkPreviewBlock(BaseNotionBlock):
264 |     type: str = "link_preview"
265 |     link_preview: LinkPreviewAttributes
266 | 
267 | 
268 | # TODO: Template blocks
269 | 
270 | 
271 | class LinkToPageBlock(BaseNotionBlock):
272 |     type: str = "link_to_page"
273 |     link_to_page: LinkToPageAttributes
274 | 
275 | 
276 | # TODO: Synced Block blocks
277 | 
278 | # TODO: Table blocks
279 | 
280 | # TODO: Table row blocks
281 | 
282 | BLOCKS_MAPPING = {
283 |     list(_cls.__fields__.keys())[-1]: _cls for _cls in BaseNotionBlock.__subclasses__()
284 | }
285 | 
286 | 
287 | def parse_one_block(data: Dict) -> BaseNotionBlock:
288 |     if data["type"] not in BLOCKS_MAPPING:
289 |         warnings.warn(f"Unknown block type: {data['type']}")
290 |         return None
291 | 
292 |     return parse_obj_as(BLOCKS_MAPPING[data["type"]], data)
293 | 
294 | 
295 | def parse_blocks(
296 |     data: List[Dict], recursive: bool = False, client: Client = None
297 | ) -> List[BaseNotionBlock]:
298 |     all_blocks = []
299 |     for block_data in data:
300 |         block = parse_one_block(block_data)
301 |         if block.has_children and recursive and client:
302 |             block.set_children(
303 |                 parse_blocks(
304 |                     client.blocks.children.list(block_id=block.id)["results"],
305 |                     recursive=recursive,
306 |                     client=client,
307 |                 )
308 |             )
309 |         all_blocks.append(block)
310 |     return all_blocks
311 | 


--------------------------------------------------------------------------------
/src/notion_df/configs.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Optional, Callable, Tuple
  2 | import warnings
  3 | import itertools
  4 | from dataclasses import dataclass
  5 | 
  6 | from pydantic import BaseModel, validator, parse_obj_as
  7 | from pandas.api.types import (
  8 |     is_datetime64_any_dtype,
  9 |     is_numeric_dtype,
 10 |     is_object_dtype,
 11 |     is_bool_dtype,
 12 |     is_categorical_dtype,
 13 |     is_list_like,
 14 | )
 15 | 
 16 | from notion_df.base import (
 17 |     SelectOptions,
 18 |     NumberFormat,
 19 |     RollupProperty,
 20 |     FormulaProperty,
 21 |     RelationProperty,
 22 | )
 23 | from notion_df.utils import (
 24 |     flatten_dict,
 25 |     IDENTITY_TRANSFORM,
 26 |     REMOVE_EMPTY_STR_TRANSFORM,
 27 |     SECURE_STR_TRANSFORM,
 28 |     SECURE_BOOL_TRANSFORM,
 29 |     SECURE_TIME_TRANSFORM,
 30 |     LIST_TRANSFORM,
 31 | )
 32 | 
 33 | 
 34 | class BasePropertyConfig(BaseModel):
 35 |     id: Optional[str]
 36 |     type: Optional[str]
 37 | 
 38 |     def query_dict(self):
 39 |         return flatten_dict(self.dict())
 40 | 
 41 |     @validator("type", always=True)
 42 |     def automatically_set_type_value(cls, v):
 43 |         _type = list(cls.__fields__.keys())[-1]
 44 |         if v is None:
 45 |             return _type
 46 |         else:
 47 |             assert _type == v, f"{_type} != {v}"
 48 |             return _type
 49 | 
 50 | 
 51 | class TitleConfig(BasePropertyConfig):
 52 |     title: Dict = {}
 53 | 
 54 |     # TODO: Make the validator automatically geneerated
 55 |     @validator("title")
 56 |     def title_is_empty_dict(cls, v):
 57 |         if v:
 58 |             raise ValueError("The title dict must be empty")
 59 |         return v
 60 | 
 61 | 
 62 | class RichTextConfig(BasePropertyConfig):
 63 |     rich_text: Dict = {}
 64 | 
 65 |     @validator("rich_text")
 66 |     def title_is_empty_dict(cls, v):
 67 |         if v:
 68 |             raise ValueError("The rich_text dict must be empty")
 69 |         return v
 70 | 
 71 | 
 72 | class NumberConfig(BasePropertyConfig):
 73 |     number: NumberFormat
 74 | 
 75 |     # TODO:Add enum based on https://developers.notion.com/reference/create-a-database#number-configuration
 76 | 
 77 | 
 78 | class SelectConfig(BasePropertyConfig):
 79 |     select: Optional[SelectOptions]
 80 | 
 81 | 
 82 | class MultiSelectConfig(BasePropertyConfig):
 83 |     multi_select: Optional[SelectOptions]
 84 | 
 85 | 
 86 | class DateConfig(BasePropertyConfig):
 87 |     date: Dict = {}
 88 | 
 89 |     @validator("date")
 90 |     def title_is_empty_dict(cls, v):
 91 |         if v:
 92 |             raise ValueError("The date dict must be empty")
 93 |         return v
 94 | 
 95 | 
 96 | class PeopleConfig(BasePropertyConfig):
 97 |     people: Dict = {}
 98 | 
 99 |     @validator("people")
100 |     def title_is_empty_dict(cls, v):
101 |         if v:
102 |             raise ValueError("The people dict must be empty")
103 |         return v
104 | 
105 | 
106 | class FilesConfig(BasePropertyConfig):
107 |     files: Dict = {}
108 | 
109 |     @validator("files")
110 |     def title_is_empty_dict(cls, v):
111 |         if v:
112 |             raise ValueError("The files dict must be empty")
113 |         return v
114 | 
115 | 
116 | class CheckboxConfig(BasePropertyConfig):
117 |     checkbox: Dict = {}
118 | 
119 |     @validator("checkbox")
120 |     def title_is_empty_dict(cls, v):
121 |         if v:
122 |             raise ValueError("The checkbox dict must be empty")
123 |         return v
124 | 
125 | 
126 | class URLConfig(BasePropertyConfig):
127 |     url: Dict = {}
128 | 
129 |     @validator("url")
130 |     def title_is_empty_dict(cls, v):
131 |         if v:
132 |             raise ValueError("The url dict must be empty")
133 |         return v
134 | 
135 | 
136 | class EmailConfig(BasePropertyConfig):
137 |     email: Dict = {}
138 | 
139 |     @validator("email")
140 |     def title_is_empty_dict(cls, v):
141 |         if v:
142 |             raise ValueError("The email dict must be empty")
143 |         return v
144 | 
145 | 
146 | class PhoneNumberConfig(BasePropertyConfig):
147 |     phone_number: Dict = {}
148 | 
149 |     @validator("phone_number")
150 |     def title_is_empty_dict(cls, v):
151 |         if v:
152 |             raise ValueError("The phone_number dict must be empty")
153 |         return v
154 | 
155 | 
156 | class FormulaConfig(BasePropertyConfig):
157 |     formula: FormulaProperty
158 | 
159 | 
160 | class RelationConfig(BasePropertyConfig):
161 |     relation: RelationProperty
162 | 
163 | 
164 | class RollupConfig(BasePropertyConfig):
165 |     rollup: RollupProperty
166 | 
167 | 
168 | class CreatedTimeConfig(BasePropertyConfig):
169 |     created_time: Dict = {}
170 | 
171 |     @validator("created_time")
172 |     def title_is_empty_dict(cls, v):
173 |         if v:
174 |             raise ValueError("The created_time dict must be empty")
175 |         return v
176 | 
177 | 
178 | class CreatedByConfig(BasePropertyConfig):
179 |     created_by: Dict = {}
180 | 
181 |     @validator("created_by")
182 |     def title_is_empty_dict(cls, v):
183 |         if v:
184 |             raise ValueError("The created_by dict must be empty")
185 |         return v
186 | 
187 | 
188 | class LastEditedTimeConfig(BasePropertyConfig):
189 |     last_edited_time: Dict = {}
190 | 
191 |     @validator("last_edited_time")
192 |     def title_is_empty_dict(cls, v):
193 |         if v:
194 |             raise ValueError("The last_edited_time dict must be empty")
195 |         return v
196 | 
197 | 
198 | class LastEditedByConfig(BasePropertyConfig):
199 |     last_edited_by: Dict = {}
200 | 
201 |     @validator("last_edited_by")
202 |     def title_is_empty_dict(cls, v):
203 |         if v:
204 |             raise ValueError("The last_edited_by dict must be empty")
205 |         return v
206 | 
207 | 
208 | def _convert_classname_to_typename(s):
209 |     import re
210 | 
211 |     s = s.replace("Config", "").replace("URL", "Url")
212 |     return re.sub(r"(?<!^)(?=[A-Z])", "_", s).lower()
213 | 
214 | 
215 | CONFIGS_MAPPING = {
216 |     _convert_classname_to_typename(_cls.__name__): _cls
217 |     for _cls in BasePropertyConfig.__subclasses__()
218 | }
219 | 
220 | NON_EDITABLE_TYPES = [
221 |     "formula",  # TODO: Double check for this
222 |     "files",  # According to https://developers.notion.com/reference/file-object#externally-hosted-files-vs-files-hosted-by-notion
223 |     "created_time",
224 |     "created_by",
225 |     "last_edited_time",
226 |     "last_edited_by",
227 |     "rollup",
228 | ]
229 | 
230 | 
231 | def parse_single_config(data: Dict) -> BasePropertyConfig:
232 |     return parse_obj_as(CONFIGS_MAPPING[data["type"]], data)
233 | 
234 | 
235 | CONFIGS_DF_TRANSFORMER = {
236 |     "title": SECURE_STR_TRANSFORM,
237 |     "rich_text": SECURE_STR_TRANSFORM,
238 |     "number": None,
239 |     "select": REMOVE_EMPTY_STR_TRANSFORM,
240 |     "multi_select": lambda lst: [str(ele) for ele in lst]
241 |     if is_list_like(lst)
242 |     else str(lst),
243 |     "date": SECURE_TIME_TRANSFORM,
244 |     "checkbox": SECURE_BOOL_TRANSFORM,
245 |     ### Notion-specific Properties ###
246 |     # Currently we don't automatically convert these properties
247 |     # We assume the users will use the correct type and we don't need to perform any transformation
248 |     "people": IDENTITY_TRANSFORM,
249 |     "relation": LIST_TRANSFORM,
250 |     "url": REMOVE_EMPTY_STR_TRANSFORM,
251 |     "email": REMOVE_EMPTY_STR_TRANSFORM,
252 |     ### TODO: check the following ###
253 |     "files": SECURE_STR_TRANSFORM,
254 |     "phone_number": SECURE_STR_TRANSFORM,
255 |     "formula": SECURE_STR_TRANSFORM,
256 |     "rollup": SECURE_STR_TRANSFORM,
257 |     "created_time": SECURE_STR_TRANSFORM,
258 |     "created_by": SECURE_STR_TRANSFORM,
259 |     "last_edited_time": SECURE_STR_TRANSFORM,
260 |     "last_edited_by": SECURE_STR_TRANSFORM,
261 | }
262 | 
263 | 
264 | def _infer_series_config(column: "pd.Series") -> BasePropertyConfig:
265 |     dtype = column.dtype
266 | 
267 |     if is_object_dtype(dtype):
268 |         if all(is_list_like(ele) for ele in column):
269 |             all_possible_values = set(
270 |                 list(itertools.chain.from_iterable(column.to_list()))
271 |             )
272 |             all_possible_values = [str(ele) for ele in all_possible_values]
273 |             return MultiSelectConfig(
274 |                 multi_select=SelectOptions.from_value(all_possible_values),
275 |             )
276 |         else:
277 |             return RichTextConfig()
278 |     if is_numeric_dtype(dtype):
279 |         return NumberConfig(number=NumberFormat(format="number"))
280 |     if is_bool_dtype(dtype):
281 |         return CheckboxConfig()
282 |     if is_categorical_dtype(dtype):
283 |         return SelectConfig(
284 |             select=SelectOptions.from_value([str for cat in dtype.categories]),
285 |         )
286 |     if is_datetime64_any_dtype(dtype):
287 |         return DateConfig()
288 | 
289 |     return None
290 | 
291 | 
292 | @dataclass
293 | class DatabaseSchema:
294 | 
295 |     configs: Dict[str, BasePropertyConfig]
296 | 
297 |     @classmethod
298 |     def from_raw(cls, configs: Dict) -> "DatabaseSchema":
299 | 
300 |         configs = {key: parse_single_config(config) for key, config in configs.items()}
301 |         return cls(configs)
302 | 
303 |     def __getitem__(self, key: int):
304 |         return self.configs[key]
305 | 
306 |     def query_dict(self) -> Dict:
307 |         return {key: config.query_dict() for key, config in self.configs.items()}
308 | 
309 |     @classmethod
310 |     def from_df(
311 |         cls, df: "pd.DataFrame", title_col: Optional[str] = None
312 |     ) -> "DatabaseSchema":
313 |         """Automatically infer the schema from a pandas dataframe"""
314 |         df = df.infer_objects()
315 | 
316 |         configs = {}
317 |         for col in df.columns:
318 |             config = _infer_series_config(df[col])
319 |             configs[col] = config
320 | 
321 |         if title_col is not None:
322 |             configs[title_col] = TitleConfig()
323 |         else:
324 |             configs[df.columns[0]] = TitleConfig()
325 | 
326 |         return cls(configs)
327 | 
328 |     @property
329 |     def title_column(self) -> Optional[str]:
330 |         for key, config in self.configs.items():
331 |             if isinstance(config, TitleConfig) or config.type == "title":
332 |                 # TODO: Rethink this
333 |                 return key
334 | 
335 |     def create_df(self, df) -> "pd.DataFrame":
336 |         
337 |         notion_urls = df.notion_urls
338 |         notion_ids = df.notion_ids
339 |         notion_query_results = df.notion_query_results
340 | 
341 |         df = df.copy()
342 |         # Ensure the column integrity
343 |         # See the issue mentioned in https://github.com/lolipopshock/notion-df/issues/17
344 |         columns = [col for col in df.columns if col in self.configs]
345 |         df = df[columns]
346 |         
347 |         df.schema = self
348 |         
349 |         with warnings.catch_warnings():
350 |             warnings.simplefilter("ignore")
351 |             df.notion_urls = notion_urls
352 |             df.notion_ids = notion_ids
353 |             df.notion_query_results = notion_query_results
354 | 
355 |         return df
356 | 
357 |     def is_df_compatible(self, df: "pd.DataFrame") -> bool:
358 |         """Validate the dataframe against the schema"""
359 | 
360 |         if hasattr(df, "schema"):
361 |             if not df.schema == self:
362 |                 return False
363 | 
364 |             # TODO: There might miss one thing: if the rollup is not configured
365 |             # the database reterive result will be empty for that column.
366 |             # But the database query will return the value for that column
367 |             # (even if that's empty). So this would miss this check...
368 |         else:
369 |             for col in df.columns:
370 |                 if col not in self.configs.keys():
371 |                     return False
372 | 
373 |         # TODO: Add more advanced check on datatypes
374 |         return True
375 | 
376 |     def transform(
377 |         self, df: "pd.DataFrame", remove_non_editables=False
378 |     ) -> "pd.DataFrame":
379 |         """Transform the df such that the data values are compatible with the schema.
380 |         It assumes the df has already been validated against the schema.
381 |         """
382 |         df = df.copy()
383 |         used_columns = []
384 |         for col in df.columns:
385 |             if self[col].type in NON_EDITABLE_TYPES:
386 |                 continue  # Skip non-editable columns
387 | 
388 |             transform = CONFIGS_DF_TRANSFORMER[self[col].type]
389 |             if transform is not None:
390 |                 df[col] = df[col].apply(transform)
391 |             used_columns.append(col)
392 |         if remove_non_editables:
393 |             return df[used_columns]
394 |         return df
395 | 


--------------------------------------------------------------------------------
/src/notion_df/constants.py:
--------------------------------------------------------------------------------
1 | # See https://developers.notion.com/reference/request-limits
2 | 
3 | RICH_TEXT_CONTENT_MAX_LENGTH = 2000
4 | RICH_TEXT_LINK_MAX_LENGTH = 1000
5 | EQUATION_EXPRESSION_MAX_LENGTH = 1000


--------------------------------------------------------------------------------
/src/notion_df/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Dict, Optional, Union, Any
 2 | from datetime import datetime
 3 | from dateutil.parser import parse
 4 | from uuid import UUID
 5 | 
 6 | import pandas as pd
 7 | from pandas.api.types import is_array_like, is_datetime64_any_dtype, is_list_like
 8 | 
 9 | 
10 | def flatten_dict(data: Dict):
11 |     """Remove entries in dict whose values are None"""
12 |     if isinstance(data, dict):
13 |         return {
14 |             key: flatten_dict(value) for key, value in data.items() if value is not None
15 |         }
16 |     elif isinstance(data, list) or isinstance(data, tuple):
17 |         return [flatten_dict(value) for value in data]
18 |     else:
19 |         return data
20 | 
21 | 
22 | def is_item_empty(item: Any) -> bool:
23 | 
24 |     if item is None or item == []:
25 |         return True
26 | 
27 |     isna = pd.isna(item)
28 |     if is_array_like(isna):
29 |         isna = isna.all()
30 |         # TODO: Rethink it is all or any
31 | 
32 |     return isna
33 | 
34 | 
35 | def is_time_string(s: str) -> bool:
36 | 
37 |     # Ref https://stackoverflow.com/questions/25341945/check-if-string-has-date-any-format
38 |     try:
39 |         parse(s)
40 |         return True
41 |     except ValueError:
42 |         return False
43 | 
44 | 
45 | def is_uuid(s: str) -> bool:
46 |     # Kind of an OK solution.. But can be further improved?
47 |     try:
48 |         UUID(str(s))
49 |         return True
50 |     except ValueError:
51 |         return False
52 | 
53 | 
54 | ISO8601_REGEX = r"^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?$"
55 | # See https://stackoverflow.com/questions/41129921/validate-an-iso-8601-datetime-string-in-python
56 | ISO8601_STRFTIME_TRANSFORM = lambda ele: ele.strftime("%Y-%m-%dT%H:%M:%SZ")
57 | 
58 | strtime_transform = lambda ele: parse(ele).strftime("%Y-%m-%dT%H:%M:%SZ")
59 | datetime_transform = lambda ele: ele.strftime("%Y-%m-%dT%H:%M:%SZ")
60 | 
61 | 
62 | def transform_time(s: Any) -> str:
63 |     if not is_item_empty(s):
64 |         if isinstance(s, str):
65 |             return strtime_transform(s)
66 |         elif isinstance(s, datetime):
67 |             return datetime_transform(s)
68 |         elif is_datetime64_any_dtype(s):
69 |             return datetime_transform(s)
70 | 
71 | 
72 | IDENTITY_TRANSFORM = lambda ele: ele
73 | SECURE_STR_TRANSFORM = lambda ele: str(ele) if not is_item_empty(ele) else ""
74 | LIST_TRANSFORM = lambda ele: ele if is_list_like(ele) else [ele]
75 | REMOVE_EMPTY_STR_TRANSFORM = (
76 |     lambda ele: None if ele == "" or ele is None or pd.isna(ele) else SECURE_STR_TRANSFORM(ele)
77 | )
78 | SECURE_BOOL_TRANSFORM = lambda ele: bool(ele) if not is_item_empty(ele) else None
79 | SECURE_TIME_TRANSFORM = transform_time
80 | 


--------------------------------------------------------------------------------
/src/notion_df/values.py:
--------------------------------------------------------------------------------
  1 | ### Referring to https://developers.notion.com/reference/page#property-value-object
  2 | 
  3 | from typing import List, Dict, Optional, Union, Any
  4 | from dataclasses import dataclass
  5 | from copy import deepcopy
  6 | import numbers
  7 | 
  8 | from pydantic import BaseModel, parse_obj_as, validator, root_validator
  9 | import pandas as pd
 10 | from pandas.api.types import is_array_like
 11 | 
 12 | from notion_df.base import (
 13 |     RichTextObject,
 14 |     SelectOption,
 15 |     DateObject,
 16 |     RelationObject,
 17 |     UserObject,
 18 |     RollupObject,
 19 |     FileObject,
 20 |     FormulaObject
 21 | )
 22 | from notion_df.utils import (
 23 |     flatten_dict,
 24 |     is_list_like
 25 | )
 26 | 
 27 | 
 28 | class BasePropertyValues(BaseModel):
 29 |     id: Optional[str]  # TODO: Rethink whether we can do this
 30 |     # The Optional[id] is used when creating property values
 31 |     type: Optional[str]
 32 | 
 33 |     # TODO: Add abstractmethods for them
 34 |     @classmethod
 35 |     def from_value(cls, value):
 36 |         pass
 37 | 
 38 |     @property
 39 |     def value(self):
 40 |         pass
 41 | 
 42 |     def query_dict(self):
 43 |         return flatten_dict(self.dict())
 44 | 
 45 | 
 46 | class TitleValues(BasePropertyValues):
 47 |     title: List[RichTextObject]
 48 | 
 49 |     @property
 50 |     def value(self) -> Optional[str]:
 51 |         return (
 52 |             None
 53 |             if len(self.title) == 0
 54 |             else " ".join([text.value for text in self.title])
 55 |         )
 56 | 
 57 |     @classmethod
 58 |     def from_value(cls, value):
 59 |         return cls(title=RichTextObject.encode_string(value))
 60 |         # TODO: Rethink whether we should split input string to multiple elements in the list
 61 | 
 62 | 
 63 | class RichTextValues(BasePropertyValues):
 64 |     rich_text: List[RichTextObject]
 65 | 
 66 |     @property
 67 |     def value(self) -> Optional[str]:
 68 |         return (
 69 |             None
 70 |             if len(self.rich_text) == 0
 71 |             else " ".join([text.value for text in self.rich_text])
 72 |         )
 73 | 
 74 |     @classmethod
 75 |     def from_value(cls, value: str):
 76 |         return cls(rich_text=RichTextObject.encode_string(value))
 77 | 
 78 | 
 79 | class NumberValues(BasePropertyValues):
 80 |     number: Optional[Union[float, int]]
 81 | 
 82 |     @property
 83 |     def value(self) -> str:
 84 |         return self.number
 85 | 
 86 |     @classmethod
 87 |     def from_value(cls, value: Union[float, int]):
 88 |         return cls(number=value)
 89 | 
 90 | 
 91 | class SelectValues(BasePropertyValues):
 92 |     select: Optional[SelectOption]
 93 | 
 94 |     @property
 95 |     def value(self) -> Optional[str]:
 96 |         return self.select.name if self.select else None
 97 | 
 98 |     @classmethod
 99 |     def from_value(cls, value: str):
100 |         return cls(select=SelectOption.from_value(value))
101 | 
102 | 
103 | class MultiSelectValues(BasePropertyValues):
104 |     multi_select: List[SelectOption]
105 | 
106 |     @property
107 |     def value(self) -> List[str]:
108 |         return [select.name for select in self.multi_select]
109 | 
110 |     @classmethod
111 |     def from_value(cls, values: Union[List[str], str]):
112 |         if is_list_like(values):
113 |             return cls(
114 |                 multi_select=[SelectOption.from_value(value) for value in values]
115 |             )
116 |         else:
117 |             return cls(multi_select=[SelectOption.from_value(values)])
118 | 
119 | 
120 | class DateValues(BasePropertyValues):
121 |     date: Optional[DateObject]
122 | 
123 |     @property
124 |     def value(self) -> str:
125 |         return self.date.value if self.date else None
126 | 
127 |     @classmethod
128 |     def from_value(cls, value: str):
129 |         return cls(date=DateObject.from_value(value))
130 | 
131 | 
132 | class FormulaValues(BasePropertyValues):
133 |     formula: FormulaObject
134 |     
135 |     @property
136 |     def value(self):
137 |         return self.formula.value
138 | 
139 | 
140 | class RelationValues(BasePropertyValues):
141 |     relation: List[RelationObject]
142 | 
143 |     @property
144 |     def value(self) -> List[str]:
145 |         return [relation.id for relation in self.relation]
146 | 
147 |     @classmethod
148 |     def from_value(cls, values: Union[List[str], str]):
149 |         if is_list_like(values):
150 |             return cls(relation=[RelationObject.from_value(value) for value in values])
151 |         else:
152 |             return cls(relation=[RelationObject.from_value(values)])
153 | 
154 | 
155 | class PeopleValues(BasePropertyValues):
156 |     people: List[UserObject]
157 | 
158 |     @property
159 |     def value(self) -> List[str]:
160 |         return [people.id for people in self.people]
161 | 
162 |     @classmethod
163 |     def from_value(cls, values: Union[List[str], str]):
164 |         if is_list_like(values):
165 |             return cls(people=[UserObject.from_value(value) for value in values])
166 |         else:
167 |             return cls(people=[UserObject.from_value(values)])
168 | 
169 | 
170 | class FilesValues(BasePropertyValues):
171 |     files: List[FileObject]
172 | 
173 |     @property
174 |     def value(self) -> List[str]:
175 |         return [file.value for file in self.files]
176 | 
177 | class CheckboxValues(BasePropertyValues):
178 |     checkbox: Optional[bool]
179 | 
180 |     @property
181 |     def value(self) -> Optional[bool]:
182 |         return self.checkbox
183 | 
184 |     @classmethod
185 |     def from_value(cls, value: bool):
186 |         return cls(checkbox=value)
187 | 
188 | 
189 | class URLValues(BasePropertyValues):
190 |     url: Optional[str]
191 | 
192 |     @property
193 |     def value(self) -> Optional[str]:
194 |         return self.url
195 | 
196 |     @classmethod
197 |     def from_value(cls, value: Optional[str]):
198 |         return cls(url=value)
199 | 
200 |     def query_dict(self):
201 |         res = flatten_dict(self.dict())
202 |         if "url" not in res:
203 |             res["url"] = None
204 |             # The url value is required by the notion API
205 |         return res
206 | 
207 | 
208 | class EmailValues(BasePropertyValues):
209 |     email: Optional[str]
210 | 
211 |     @property
212 |     def value(self) -> Optional[str]:
213 |         return self.email
214 | 
215 |     @classmethod
216 |     def from_value(cls, value: str):
217 |         return cls(email=value)
218 | 
219 | 
220 | class PhoneNumberValues(BasePropertyValues):
221 |     phone_number: Optional[str]
222 | 
223 |     @property
224 |     def value(self) -> Optional[str]:
225 |         return self.phone_number
226 | 
227 |     @classmethod
228 |     def from_value(cls, value: str):
229 |         return cls(phone_number=value)
230 | 
231 | 
232 | class CreatedTimeValues(BasePropertyValues):
233 |     created_time: Optional[str]
234 | 
235 |     @property
236 |     def value(self) -> Optional[str]:
237 |         return self.created_time
238 | 
239 |     @classmethod
240 |     def from_value(cls, value: str):
241 |         return cls(created_time=value)
242 | 
243 | 
244 | class CreatedByValues(BasePropertyValues):
245 |     created_by: UserObject
246 | 
247 |     @property
248 |     def value(self) -> List[str]:
249 |         return self.created_by.value
250 | 
251 | 
252 | class LastEditedTimeValues(BasePropertyValues):
253 |     last_edited_time: str
254 | 
255 |     @property
256 |     def value(self) -> Optional[str]:
257 |         return self.last_edited_time
258 | 
259 |     @classmethod
260 |     def from_value(cls, value: str):
261 |         return cls(last_edited_time=value)
262 | 
263 | 
264 | class LastEditedByValues(BasePropertyValues):
265 |     last_edited_by: UserObject
266 | 
267 |     @property
268 |     def value(self) -> List[str]:
269 |         return self.last_edited_by.value
270 | 
271 | 
272 | VALUES_MAPPING = {
273 |     list(_cls.__fields__.keys())[-1]: _cls
274 |     for _cls in BasePropertyValues.__subclasses__()
275 |     if len(_cls.__fields__)
276 |     == 3  # TODO: When all classes have been implemented, we can just remove this check
277 | }
278 | 
279 | 
280 | class RollupValues(BasePropertyValues):
281 |     rollup: RollupObject
282 | 
283 |     @validator("rollup", pre=True)
284 |     def check_rollup_values(cls, val):
285 |         val = deepcopy(val)
286 |         if val.get("array") is not None:
287 |             val["array"] = [
288 |                 parse_obj_as(VALUES_MAPPING[data["type"]], data)
289 |                 for data in val["array"]
290 |             ]
291 |         return val
292 | 
293 |     @property
294 |     def value(self):
295 |         return self.rollup.value
296 | 
297 | 
298 | VALUES_MAPPING["rollup"] = RollupValues
299 | 
300 | 
301 | def parse_single_values(data: Dict) -> BasePropertyValues:
302 |     return parse_obj_as(VALUES_MAPPING[data["type"]], data)
303 | 
304 | 
305 | def _guess_value_schema(val: Any) -> object:
306 | 
307 |     if isinstance(val, str):
308 |         return RichTextValues
309 |     elif isinstance(val, numbers.Number):
310 |         return NumberValues
311 |     elif isinstance(val, bool):
312 |         return CheckboxValues
313 |     else:
314 |         raise ValueError(f"Unknown value type: {type(val)}")
315 | 
316 | 
317 | def _is_item_empty(item):
318 | 
319 |     if item is None or item == []:
320 |         return True
321 | 
322 |     isna = pd.isna(item)
323 |     if is_array_like(isna):
324 |         isna = isna.all()
325 |         # TODO: Rethink it is all or any
326 | 
327 |     return isna
328 | 
329 | 
330 | RESERVED_VALUES = ["url"]
331 | # Even if the value is none, we still want to keep it in the dataframe
332 | 
333 | 
334 | def _is_reserved_value(key, schema):
335 |     return schema[key].type in RESERVED_VALUES
336 | 
337 | 
338 | def parse_value_with_schema(
339 |     idx: int, key: str, value: Any, schema: "DatabaseSchema"
340 | ) -> BasePropertyValues:
341 |     # TODO: schema shouldn't be allowed to be empty in the future version
342 |     # schema should be determined at the dataframe level.
343 | 
344 |     if schema is not None:
345 |         value_func = VALUES_MAPPING[schema[key].type]
346 |     else:
347 |         if idx == 0:
348 |             # TODO: Brutally enforce the first one to be the title, though
349 |             # should be optimized in future versions
350 |             value_func = TitleValues
351 |             value = str(value)
352 |         else:
353 |             value_func = _guess_value_schema(value)
354 | 
355 |     return value_func.from_value(value)
356 | 
357 | 
358 | @dataclass
359 | class PageProperty:
360 |     """This class is used to parse properties of a single Notion Page. 
361 |     
362 |     :: example:
363 |     
364 |         >>> data = \
365 |                 {"Description": {"id": "ji%3Dc", "type": "rich_text", "rich_text": []},
366 |                 "Created": {"id": "mbOA", "type": "date", "date": None},
367 |                 "Title": {"id": "title", "type": "title", "title": []}}
368 |         >>> property = PageProperty.from_raw(data)
369 |     """
370 | 
371 |     properties: Dict[str, BasePropertyValues]
372 | 
373 |     @classmethod
374 |     def from_raw(cls, properties: Dict) -> "PageProperty":
375 |         properties = {k: parse_single_values(v) for k, v in properties.items()}
376 |         return cls(properties)
377 | 
378 |     def __getitem__(self, key):
379 |         return self.properties[key]
380 | 
381 |     def to_series(self):
382 |         return pd.Series(
383 |             {key: property.value for key, property in self.properties.items()}
384 |         )
385 | 
386 |     @classmethod
387 |     def from_series(
388 |         cls, series: pd.Series, schema: "DatabaseSchema" = None
389 |     ) -> "PageProperty":
390 |         return cls(
391 |             {
392 |                 key: parse_value_with_schema(idx, key, val, schema)
393 |                 for idx, (key, val) in enumerate(series.items())
394 |                 if not _is_item_empty(val) or _is_reserved_value(key, schema)
395 |             }
396 |         )
397 | 
398 |     def query_dict(self) -> Dict:
399 |         return {key: property.query_dict() for key, property in self.properties.items()}
400 | 
401 | 
402 | @dataclass
403 | class PageProperties:
404 |     """This class is used to parse multiple page properties within a database
405 |     
406 |     :: example:
407 |     
408 |         >>> data = \
409 |                 [
410 |                     {
411 |                         "object": "page",
412 |                         "id": "xxxx",
413 |                         "created_time": "2032-01-03T00:00:00.000Z",
414 |                         "properties": {
415 |                             "Description": {"id": "ji%3Dc", "type": "rich_text", "rich_text": []},
416 |                             "Created": {"id": "mbOA", "type": "date", "date": None},
417 |                             "Title": {"id": "title", "type": "title", "title": []}
418 |                         }
419 |                     },
420 |                     {
421 |                         "object": "page",
422 |                         "id": "xxxx",
423 |                         "created_time": "2032-01-03T00:00:01.000Z",
424 |                         "properties": {
425 |                             "Description": {"id": "ji%3Dc", "type": "rich_text", "rich_text": []},
426 |                             "Created": {"id": "mbOA", "type": "date", "date": None},
427 |                             "Title": {"id": "title", "type": "title", "title": []}
428 |                         }
429 |                     }
430 |                 ]
431 |         >>> property = PageProperties.from_raw(data)
432 |     """
433 | 
434 |     page_properties: List[PageProperty]
435 | 
436 |     @classmethod
437 |     def from_raw(cls, properties: List[Dict]) -> "PageProperties":
438 |         page_properties = [
439 |             PageProperty.from_raw(property["properties"]) for property in properties
440 |         ]
441 |         return cls(page_properties)
442 | 
443 |     def __getitem__(self, key: int):
444 |         return self.page_properties[key]
445 | 
446 |     def to_frame(self):
447 |         return pd.DataFrame([property.to_series() for property in self.page_properties])
448 | 


--------------------------------------------------------------------------------
/tests/test_agent.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from notion_df.agent import download
 4 | 
 5 | NOTION_API_KEY = os.environ.get("NOTION_API_KEY")
 6 | NOTION_LARGE_DF = os.environ.get("NOTION_LARGE_DF")
 7 | NOTION_LARGE_DF_ROWS = 150
 8 | 
 9 | def test_nrows():
10 |     if not NOTION_LARGE_DF or not NOTION_API_KEY:
11 |         pytest.skip("API key not provided")
12 | 
13 |     df = download(NOTION_LARGE_DF, api_key=NOTION_API_KEY)
14 |     assert len(df) == NOTION_LARGE_DF_ROWS
15 | 
16 |     df = download(NOTION_LARGE_DF, nrows=101, api_key=NOTION_API_KEY)
17 |     assert len(df) == 101
18 | 
19 |     df = download(NOTION_LARGE_DF, nrows=15, api_key=NOTION_API_KEY)
20 |     assert len(df) == 15


--------------------------------------------------------------------------------
/tests/test_base.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import pytest
  4 | import notion_df
  5 | import pandas as pd
  6 | from pydantic import ValidationError
  7 | from notion_df.agent import download, upload
  8 | 
  9 | NOTION_API_KEY = os.environ.get("NOTION_API_KEY")
 10 | 
 11 | 
 12 | def test_select_option():
 13 |     schema = notion_df.configs.DatabaseSchema(
 14 |         {"options": notion_df.configs.MultiSelectConfig()}
 15 |     )
 16 | 
 17 |     df = pd.DataFrame([{"options": [1, 2, 3]}])
 18 |     dff = schema.transform(df)
 19 |     notion_df.values.PageProperty.from_series(dff.iloc[0], schema)
 20 | 
 21 |     # Not working because of commas in the option string
 22 |     df = pd.DataFrame([{"options": ["a,b", "c,d"]}])
 23 |     dff = schema.transform(df)
 24 |     with pytest.raises(ValidationError):
 25 |         notion_df.values.PageProperty.from_series(dff.iloc[0], schema)
 26 | 
 27 |     # The following also checks whether it can convert elements into strings
 28 |     df = pd.DataFrame([{"options": [[1, 2, 3], [4, 5, 6]]}])
 29 |     dff = schema.transform(df)
 30 |     with pytest.raises(ValidationError):
 31 |         notion_df.values.PageProperty.from_series(dff.iloc[0], schema)
 32 | 
 33 | 
 34 | def test_rollup():
 35 |     NOTION_ROLLUP_DF = os.environ.get("NOTION_ROLLUP_DF")
 36 | 
 37 |     if not NOTION_ROLLUP_DF or not NOTION_API_KEY:
 38 |         pytest.skip("API key not provided")
 39 | 
 40 |     # Ensure the rollup values can be downloaded and uploaded
 41 |     df = download(NOTION_ROLLUP_DF, api_key=NOTION_API_KEY)
 42 |     upload(df[:2], NOTION_ROLLUP_DF, api_key=NOTION_API_KEY)
 43 |     # TODO: Add remove rollup values
 44 | 
 45 | 
 46 | def test_files_edit_by():
 47 |     NOTION_FILES_DF = os.environ.get("NOTION_FILES_DF")
 48 | 
 49 |     if not NOTION_FILES_DF or not NOTION_API_KEY:
 50 |         pytest.skip("API key not provided")
 51 | 
 52 |     df = download(NOTION_FILES_DF, api_key=NOTION_API_KEY)
 53 | 
 54 | 
 55 | def test_formula():
 56 |     NOTION_FORMULA_DF = os.environ.get("NOTION_FORMULA_DF")
 57 | 
 58 |     if not NOTION_FORMULA_DF or not NOTION_API_KEY:
 59 |         pytest.skip("API key not provided")
 60 | 
 61 |     df = download(NOTION_FORMULA_DF, api_key=NOTION_API_KEY)
 62 | 
 63 | 
 64 | def test_relation():
 65 |     NOTION_RELATION_DF = os.environ.get("NOTION_RELATION_DF")
 66 |     NOTION_RELATION_TARGET_DF = os.environ.get("NOTION_RELATION_TARGET_DF")
 67 | 
 68 |     if not NOTION_RELATION_DF or not NOTION_RELATION_TARGET_DF or not NOTION_API_KEY:
 69 |         pytest.skip("API key not provided")
 70 | 
 71 |     # download: resolve
 72 |     # upload: resolve
 73 |     df = download(
 74 |         NOTION_RELATION_DF, api_key=NOTION_API_KEY, resolve_relation_values=True
 75 |     )
 76 |     df_target = download(NOTION_RELATION_TARGET_DF, api_key=NOTION_API_KEY)
 77 | 
 78 |     assert "private_page" not in df.columns
 79 |     # See https://github.com/lolipopshock/notion-df/issues/17
 80 | 
 81 |     ## witout a new key
 82 |     upload(
 83 |         df[:1],
 84 |         NOTION_RELATION_DF,
 85 |         resolve_relation_values=True,
 86 |         create_new_rows_in_relation_target=True,
 87 |     )
 88 |     df_target_new = download(NOTION_RELATION_TARGET_DF, api_key=NOTION_API_KEY)
 89 |     assert len(df_target_new) == len(df_target)
 90 | 
 91 |     ## with a new key
 92 |     rint = random.randint(0, 100000)
 93 |     df.at[0, "Related to Tasks"] = [f"test {rint}"]
 94 |     upload(
 95 |         df[:1],
 96 |         NOTION_RELATION_DF,
 97 |         resolve_relation_values=True,
 98 |         create_new_rows_in_relation_target=True,
 99 |     )
100 |     df_target_new = download(NOTION_RELATION_TARGET_DF, api_key=NOTION_API_KEY)
101 |     assert len(df_target_new) == len(df_target) + 1
102 |     df_target_new.iloc[-1]["name"] == f"test {rint}"
103 | 
104 |     # download: not-resolve
105 |     # upload: resolve
106 |     # Avoids creating new rows for uuid only lists
107 |     df = download(
108 |         NOTION_RELATION_DF, api_key=NOTION_API_KEY, resolve_relation_values=False
109 |     )
110 |     df_target = download(NOTION_RELATION_TARGET_DF, api_key=NOTION_API_KEY)
111 | 
112 |     upload(
113 |         df[:1],
114 |         NOTION_RELATION_DF,
115 |         resolve_relation_values=True,
116 |         create_new_rows_in_relation_target=True,
117 |     )
118 |     df_target_new = download(NOTION_RELATION_TARGET_DF, api_key=NOTION_API_KEY)
119 |     assert len(df_target_new) == len(df_target)
120 | 
121 |     # download: resolve
122 |     # upload: not-resolve
123 |     # Raises error
124 |     df = download(
125 |         NOTION_RELATION_DF, api_key=NOTION_API_KEY, resolve_relation_values=True
126 |     )
127 | 
128 |     with pytest.raises(ValidationError):
129 |         upload(
130 |             df[:1],
131 |             NOTION_RELATION_DF,
132 |             resolve_relation_values=False,
133 |         )
134 | 
135 | def test_long_string():
136 |     NOTION_LONG_STRING_DF = os.environ.get("NOTION_LONG_STRING_DF")
137 |     
138 |     if not NOTION_LONG_STRING_DF or not NOTION_API_KEY:
139 |         pytest.skip("API key not provided")
140 | 
141 |     df = download(NOTION_LONG_STRING_DF, api_key=NOTION_API_KEY)
142 |     assert len(df.iloc[0,1]) == 7721
143 | 
144 |     upload(df[:1], NOTION_LONG_STRING_DF, api_key=NOTION_API_KEY)
145 |     df_new = download(NOTION_LONG_STRING_DF, api_key=NOTION_API_KEY)
146 |     # assert len(df_new.iloc[0,1]) == 7721
147 |     # This might not be true -- understand why?
148 | 
149 | def test_rich_text():
150 |     NOTION_RICH_TEXT_DF = os.environ.get("NOTION_RICH_TEXT_DF")
151 |     
152 |     if not NOTION_RICH_TEXT_DF or not NOTION_API_KEY:
153 |         pytest.skip("API key not provided")
154 | 
155 |     df = download(NOTION_RICH_TEXT_DF, api_key=NOTION_API_KEY)


--------------------------------------------------------------------------------