├── flow_watcher
    ├── __init__.py
    ├── __pycache__
    │   ├── drive.cpython-310.pyc
    │   ├── utils.cpython-310.pyc
    │   └── __init__.cpython-310.pyc
    ├── utils.py
    ├── oai.py
    ├── drive.py
    └── notion.py
├── requirements.txt
├── sandbox
    ├── tts.py
    ├── notion_read.py
    ├── list_and_download.py
    └── notion.ts
├── docs
    ├── GOOGLE_DRIVE_SETUP.md
    └── NOTION_SETUP.md
├── setup.py
├── prompts
    ├── typing.md
    └── comments.md
├── README.md
└── LICENSE


/flow_watcher/__init__.py:
--------------------------------------------------------------------------------
1 | from . import drive
2 | from . import utils
3 | from . import oai
4 | from . import notion


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | google-api-python-client==2.70.0
2 | google-auth-httplib2==0.1.0
3 | google-auth-oauthlib==0.4.6
4 | openai
5 | 


--------------------------------------------------------------------------------
/flow_watcher/__pycache__/drive.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thibo73800/flow-watcher/main/flow_watcher/__pycache__/drive.cpython-310.pyc


--------------------------------------------------------------------------------
/flow_watcher/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thibo73800/flow-watcher/main/flow_watcher/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/flow_watcher/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thibo73800/flow-watcher/main/flow_watcher/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/flow_watcher/utils.py:
--------------------------------------------------------------------------------
1 | def print_progress_bar(progress):
2 |     bar_length = 20
3 |     filled_length = int(bar_length * progress // 100)
4 |     bar = '█' * filled_length + '-' * (bar_length - filled_length)
5 |     print(f'\r[{bar}] {progress}%', end='')


--------------------------------------------------------------------------------
/sandbox/tts.py:
--------------------------------------------------------------------------------
 1 | import openai
 2 | import os
 3 | import yaml
 4 | 
 5 | from flow_watcher import oai
 6 | 
 7 | # Load configuration from config.yaml
 8 | with open('config.yaml', 'r') as config_file:
 9 |     config = yaml.safe_load(config_file)
10 | 
11 | # Main function
12 | def main():
13 |     model = oai.OAI(api_key=config['oai_key'])
14 |     audio_file_path = 'downloads/Recording 162643-053024.mp3'
15 |     transcribed_text = model.transcribe_audio(audio_file_path)
16 |     print("\nTranscribed Text:", transcribed_text)
17 | 
18 | if __name__ == "__main__":
19 |     main()


--------------------------------------------------------------------------------
/docs/GOOGLE_DRIVE_SETUP.md:
--------------------------------------------------------------------------------
 1 | # Google Drive Setup
 2 | 
 3 | ## Create a Google Cloud Project
 4 | 
 5 | 1. Navigate to the [Google Cloud Console](https://console.cloud.google.com/).
 6 | 2. Click on the project dropdown and select **New Project**.
 7 | 3. Enter a project name and click **Create**.
 8 | 
 9 | ## Enable Google Drive API
10 | 
11 | 1. In the [APIs & Services Dashboard](https://console.cloud.google.com/apis/dashboard), click **Enable APIs and Services**.
12 | 2. Search for "Google Drive API" and select it.
13 | 3. Click **Enable**.
14 | 
15 | ## Create OAuth Credentials
16 | 
17 | 1. Go to **APIs & Services** > **Credentials**.
18 | 2. Click **Create Credentials** > **OAuth client ID**.
19 | 3. Select **Desktop app** and provide a name.
20 | 4. Click **Create** and download the `credentials.json` file.
21 | 5. Create a `auth` folder in the root of the project.
22 | 6. Place the `credentials.json` file in the `auth` folder.
23 | 7. Create a `auth.yaml` file in the `auth` folder.
24 | 8. Add the following content to the `auth.yaml` file:
25 | 
26 |    ```yaml
27 |    drive: FILE_NAME
28 |    ```


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | # Read the contents of your README file
 4 | from pathlib import Path
 5 | this_directory = Path(__file__).parent
 6 | long_description = (this_directory / "README.md").read_text()
 7 | 
 8 | setup(
 9 |     name="flow_watcher",
10 |     version="0.1.0",
11 |     author="Your Name",
12 |     author_email="your.email@example.com",
13 |     description="A Python-based application that monitors a specific folder in Google Drive for real-time changes.",
14 |     long_description=long_description,
15 |     long_description_content_type="text/markdown",
16 |     url="https://github.com/thibo73800/flow-watcher",
17 |     packages=find_packages(),
18 |     install_requires=[
19 |         "google-api-python-client==2.70.0",
20 |         "google-auth-httplib2==0.1.0",
21 |         "google-auth-oauthlib==0.4.6",
22 |         "openai",
23 |         "notion2markdown==0.2.0",
24 |         "gtts"
25 |     ],
26 |     classifiers=[
27 |         "Programming Language :: Python :: 3",
28 |         "License :: OSI Approved :: MIT License",
29 |         "Operating System :: OS Independent",
30 |     ],
31 |     python_requires='>=3.7',
32 | )


--------------------------------------------------------------------------------
/sandbox/notion_read.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | from flow_watcher.notion import NotionAPI
 4 | 
 5 | def read_notion_page():
 6 |     # Load configuration from YAML files
 7 |     with open('auth/auth.yaml', 'r') as auth_file:
 8 |         auth_config = yaml.safe_load(auth_file)
 9 | 
10 |     with open('config.yaml', 'r') as config_file:
11 |         config = yaml.safe_load(config_file)
12 | 
13 |     # Get Notion API key and database ID from the configuration
14 |     notion_api_key = auth_config['notion_api_key']
15 |     notion_database_id = config['notion_database_id']
16 | 
17 |     # Initialize NotionAPI
18 |     notion_api = NotionAPI(notion_api_key, notion_database_id)
19 | 
20 |     # Specify the page ID you want to read
21 |     page_id = config['notion_page_id']
22 | 
23 |     # Read the page as markdown
24 |     markdown_content = notion_api.read_page_markdown(page_id)
25 | 
26 |     print(markdown_content)
27 | 
28 |     return
29 | 
30 |     # Create a directory to store the markdown file
31 |     os.makedirs('notion_exports', exist_ok=True)
32 | 
33 |     # Save the markdown content to a file
34 |     output_file = os.path.join('notion_exports', f'{page_id}.md')
35 |     with open(output_file, 'w', encoding='utf-8') as f:
36 |         f.write(markdown_content)
37 | 
38 |     print(f"Page content has been exported to {output_file}")
39 | 
40 | if __name__ == '__main__':
41 |     read_notion_page()


--------------------------------------------------------------------------------
/sandbox/list_and_download.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | from flow_watcher.drive import GoogleDriveHandler
 4 | import io
 5 | 
 6 | 
 7 | def list_and_downoad():
 8 |     # Load configuration from YAML files
 9 |     with open('auth/auth.yaml', 'r') as auth_file:
10 |         auth_config = yaml.safe_load(auth_file)
11 | 
12 |     with open('config.yaml', 'r') as config_file:
13 |         config = yaml.safe_load(config_file)
14 | 
15 |     # Get credentials and folder ID from the configuration
16 |     credentials_path = os.path.join('auth', auth_config['drive'])
17 |     folder_id = config['drive_folder']
18 | 
19 |     drive_handler = GoogleDriveHandler(credentials_path)
20 |     files = drive_handler.list_files_in_folder(folder_id)
21 |     
22 |     if files:
23 |         first_file = files[0]
24 |         output_path = os.path.join('downloads', first_file['name'])
25 |         os.makedirs('downloads', exist_ok=True)
26 | 
27 |         # Check if the file already exists before downloading
28 |         if os.path.exists(output_path):
29 |             print(f"File '{first_file['name']}' already exists. Skipping download.")
30 |         else:
31 |             print(f"Downloading: {first_file['name']}")
32 |             drive_handler.download_file(first_file['id'], output_path)
33 |     else:
34 |         print("No files found in the specified folder.")
35 | 
36 | if __name__ == '__main__':
37 |     list_and_downoad()
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/prompts/typing.md:
--------------------------------------------------------------------------------
 1 | You are tasked with ensuring that a Python script is properly using typing to enhance code clarity and maintainability. The script will be provided to you, and you should analyze it and add appropriate type hints.
 2 | 
 3 | ## Steps to Add Type Hints
 4 | 
 5 | ### 1. Analyze the Code
 6 | - Understand the structure and functionality of the code.
 7 | - Identify the data types used in variables, function parameters, and return values.
 8 | 
 9 | ### 2. Identify Key Components
10 | - Functions and methods
11 | - Variables and constants
12 | - Class attributes
13 | - Complex data structures (e.g., lists, dictionaries, tuples)
14 | 
15 | ### 3. Add Type Hints
16 | - Add type hints to function parameters and return values.
17 | - Use type hints for variables and class attributes where applicable.
18 | - Utilize `typing` module for complex types (e.g., `List`, `Dict`, `Tuple`, `Optional`).
19 | 
20 | ## Guidelines for Adding Type Hints
21 | 
22 | ### Use Clear and Concise Type Annotations
23 | - Ensure type hints are accurate and reflect the actual data types used.
24 | - Use `Optional` for parameters that can be `None`.
25 | - Use `Union` for parameters that can be of multiple types.
26 | - Use `Any` sparingly, only when the type cannot be determined.
27 | 
28 | ### Type Hint Examples
29 | - **Basic Types**: `int`, `str`, `float`, `bool`
30 | - **Complex Types**: `List[int]`, `Dict[str, Any]`, `Tuple[int, str]`
31 | - **Optional Types**: `Optional[int]`
32 | - **Union Types**: `Union[int, str]`
33 | 
34 | 


--------------------------------------------------------------------------------
/flow_watcher/oai.py:
--------------------------------------------------------------------------------
 1 | import openai
 2 | from typing import List
 3 | 
 4 | class OAI:
 5 |     """
 6 |     A class to interact with the OpenAI API for various functionalities such as listing models and transcribing audio.
 7 |     """
 8 | 
 9 |     def __init__(self, api_key: str) -> None:
10 |         """
11 |         Initialize the OAI class with an API key.
12 | 
13 |         Parameters
14 |         ----------
15 |         api_key : str
16 |             The API key to authenticate with the OpenAI API.
17 |         """
18 |         self.client = openai.OpenAI(api_key=api_key)
19 | 
20 |     def list_models(self) -> None:
21 |         """
22 |         List all available models from the OpenAI API and print their IDs.
23 | 
24 |         This method fetches the list of models from the OpenAI API and prints the ID of each model.
25 |         """
26 |         models: List[openai.Model] = self.client.models.list()
27 |         for model in models:
28 |             print(model.id)
29 | 
30 |     def transcribe_audio(self, file_path: str) -> str:
31 |         """
32 |         Transcribe an audio file using the OpenAI API.
33 | 
34 |         This method reads an audio file and sends it to the OpenAI API for transcription using the 'whisper-1' model.
35 | 
36 |         Parameters
37 |         ----------
38 |         file_path : str
39 |             The path to the audio file to be transcribed.
40 | 
41 |         Returns
42 |         -------
43 |         str
44 |             The transcribed text from the audio file.
45 |         """
46 |         with open(file_path, 'rb') as audio_file:
47 |             response: openai.AudioTranscription = self.client.audio.transcriptions.create(
48 |                 model="whisper-1",
49 |                 file=audio_file
50 |             )
51 |         return response.text


--------------------------------------------------------------------------------
/prompts/comments.md:
--------------------------------------------------------------------------------
 1 | You are tasked with adding comments to a piece of code to make it more understandable for AI systems or human developers. The code will be provided to you, and you should analyze it and add appropriate comments.
 2 | 
 3 | ## Steps to Add Comments
 4 | 
 5 | ### 1. Analyze the Code
 6 | - Understand the structure and functionality of the code.
 7 | 
 8 | ### 2. Identify Key Components
 9 | - Functions
10 | - Loops
11 | - Conditionals
12 | - Any complex logic
13 | 
14 | ### 3. Add Comments
15 | Explain the following:
16 | - The purpose of functions or code blocks
17 | - How complex algorithms or logic work
18 | - Any assumptions or limitations in the code
19 | - The meaning of important variables or data structures
20 | - Any potential edge cases or error handling
21 | 
22 | ## Guidelines for Adding Comments
23 | 
24 | ### Use Clear and Concise Language
25 | - Avoid stating the obvious (e.g., don’t just restate what the code does)
26 | - Focus on the “why” and “how” rather than just the “what”
27 | 
28 | ### Comment Types
29 | - **Single-line comments**: For brief explanations
30 | - **Multi-line comments**: For longer explanations or function/class descriptions
31 | 
32 | ### Use NumPy Comment Format for Methods
33 | - Provide a docstring in the NumPy format for each method.
34 | - Include sections for Parameters, Returns, and Examples if applicable.
35 | 
36 | #### Example Format
37 | 
38 | ```python
39 | def example_function(param1: int, param2: str) -> bool:
40 | """
41 | Brief description of the function.
42 | Parameters
43 | ----------
44 |     param1 : int
45 |     Description of the first parameter.
46 |     param2 : str
47 |     Description of the second parameter.
48 | Returns
49 | -------
50 |     bool
51 |     Description of the return value.
52 | 
53 | Examples (optional)
54 | --------
55 |     >>> example_function(10, 'test')
56 |     True
57 | """
58 | ```
59 | 
60 | ## Goal
61 | The goal is to make the code more understandable without changing its functionality. Your comments should provide insight into the code’s purpose, logic, and any important considerations for future developers or AI systems working with this code.


--------------------------------------------------------------------------------
/docs/NOTION_SETUP.md:
--------------------------------------------------------------------------------
 1 | # Notion Setup
 2 | 
 3 | ## Create a Notion Integration
 4 | 
 5 | 1. Navigate to the [Notion Integrations](https://www.notion.so/my-integrations) page.
 6 | 2. Click on **New Integration**.
 7 | 3. Enter a name for your integration and select the workspace where you want to use it.
 8 | 4. Click **Submit** to create the integration.
 9 | 5. Copy the **Internal Integration Token**. You will need this token to authenticate your application with Notion.
10 | 
11 | ## Share a Database with Your Integration
12 | 
13 | 1. Open the Notion page or database you want to integrate with.
14 | 2. Click on the **Share** button at the top-right corner of the page.
15 | 3. In the **Invite** field, search for the name of your integration and select it.
16 | 4. Click **Invite** to share the page or database with your integration.
17 | 
18 | ## Configuration
19 | 
20 | 1. **Create a `notion.yaml` file**
21 | 
22 |    - In the root of your project, create a `notion.yaml` file.
23 |    - Add the following content to the `notion.yaml` file:
24 | 
25 |    ```yaml
26 |    notion_token: YOUR_INTEGRATION_TOKEN
27 |    database_id: YOUR_DATABASE_ID
28 |    ```
29 | 
30 |    Replace `YOUR_INTEGRATION_TOKEN` with the Internal Integration Token you copied earlier, and `YOUR_DATABASE_ID` with the ID of the Notion database you want to interact with.
31 | 
32 | ## Usage
33 | 
34 | 1. **Install Notion SDK**
35 | 
36 |    Ensure you have the Notion SDK installed. You can add it to your `requirements.txt` or install it directly using pip:
37 | 
38 |    ```bash
39 |    pip install notion-client
40 |    ```
41 | 
42 | 2. **Access Notion API in Your Code**
43 | 
44 |    Use the Notion SDK to interact with your Notion database. Here is a basic example in Python:
45 | 
46 |    ```python
47 |    from notion_client import Client
48 |    import yaml
49 | 
50 |    # Load Notion configuration
51 |    with open('notion.yaml', 'r') as file:
52 |        config = yaml.safe_load(file)
53 | 
54 |    notion = Client(auth=config['notion_token'])
55 | 
56 |    # Example: Retrieve a database
57 |    database_id = config['database_id']
58 |    response = notion.databases.retrieve(database_id=database_id)
59 |    print(response)
60 |    ```
61 | 
62 | ## Troubleshooting
63 | 
64 | - **Authentication Errors**: Ensure that the `notion_token` in your `notion.yaml` file is correct and that your integration has access to the database.
65 | - **API Limits**: Be aware of Notion API rate limits. Refer to the [Notion API documentation](https://developers.notion.com/docs/rate-limits) for more details.
66 | 
67 | ## Additional Resources
68 | 
69 | - [Notion API Documentation](https://developers.notion.com/)
70 | - [Notion SDK for Python](https://github.com/ramnes/notion-sdk-py)
71 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The AI Agent Watcher
 2 | 
 3 | # ⚠️ **Warning:** This project is under heavy development and may be unstable.
 4 | 
 5 | ## Description
 6 | 
 7 | A Python-based application that monitors a specific folder in Google Drive for real-time changes. This watcher will notify you of any additions, deletions, or modifications within the designated folder.
 8 | 
 9 | ## Prerequisites
10 | 
11 | - **Python 3.7 or higher** installed on your machine.
12 | - A **Google account** with access to Google Drive.
13 | - **Google Cloud Project** with Google Drive API enabled.
14 | - A **Notion account** with access to the Notion API.
15 | 
16 | ## Installation
17 | 
18 | 1. **Clone the Repository**
19 | 
20 |    ```bash
21 |    git clone https://github.com/thibo73800/flow-watcher.git
22 |    cd flow-watcher
23 |    ```
24 | 
25 | 2. **Create a Virtual Environment (Optional but Recommended)**
26 | 
27 |    ```bash
28 |    python3 -m venv venv
29 |    source venv/bin/activate  # On Windows use `venv\Scripts\activate`
30 |    ```
31 | 
32 | 3. **Install Python Dependencies**
33 | 
34 |    ```bash
35 |    pip install -r requirements.txt
36 |    ```
37 | 
38 | ## Setup Google Drive API
39 | 
40 | For detailed instructions on setting up the Google Drive API, please refer to the [Google Drive Setup Documentation](docs/GOOGLE_DRIVE_SETUP.md).
41 | 
42 | ## Setup Notion API
43 | 
44 | For detailed instructions on setting up the Notion API, please refer to the [Notion Setup Documentation](NOTION_SETUP.md).
45 | 
46 | ## Configuration
47 | 
48 | 1. **Specify the Folder to Watch**
49 | 
50 |    - Create a `config.yaml` file in the root of the project.   
51 |    - Obtain the **Folder ID** of the Google Drive folder you want to monitor. This can be extracted from the folder's URL.
52 |    - Add the following content to the `config.yaml` file:
53 | 
54 |    ```yaml
55 |    drive_folder: YOUR_FOLDER_ID
56 |    ```
57 | 
58 | 2. **Specify Notion Configuration**
59 | 
60 |    - Create a `notion.yaml` file in the root of the project.
61 |    - Add the following content to the `notion.yaml` file:
62 | 
63 |    ```yaml
64 |    notion_token: YOUR_INTEGRATION_TOKEN
65 |    database_id: YOUR_DATABASE_ID
66 |    ```
67 | 
68 | ## Usage
69 | 
70 | 1. **Run the Watcher**
71 | 
72 |    ```bash
73 |    python watcher.py
74 |    ```
75 | 
76 | 2. **Authorize Access**
77 | 
78 |    - On the first run, a browser window will prompt you to authorize the application to access your Google Drive.
79 |    - Follow the on-screen instructions to grant permissions.
80 | 
81 | 3. **Monitor Folder Changes**
82 | 
83 |    - The application will start monitoring the specified folder.
84 |    - Changes such as file additions, deletions, or updates will be logged in the console.
85 | 
86 | ## Troubleshooting
87 | 
88 | - **Authentication Errors**: Ensure that the `credentials.json` file is correctly placed in the project root and that you've authorized the application.
89 | - **API Quotas**: Be mindful of Google Drive API usage limits. Monitor your usage in the Google Cloud Console to avoid exceeding quotas.
90 | 
91 | ## Contributing
92 | 
93 | Contributions are welcome! Please open an issue or submit a pull request for any enhancements or bug fixes.
94 | 
95 | ## License
96 | 
97 | [MIT License](LICENSE)


--------------------------------------------------------------------------------
/flow_watcher/drive.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import yaml
  3 | import time
  4 | from google.auth.transport.requests import Request
  5 | from google.oauth2.credentials import Credentials
  6 | from google_auth_oauthlib.flow import InstalledAppFlow
  7 | from googleapiclient.discovery import build
  8 | from googleapiclient.http import MediaIoBaseDownload
  9 | import io
 10 | from typing import List, Optional, Dict
 11 | 
 12 | from flow_watcher.utils import print_progress_bar
 13 | 
 14 | 
 15 | class GoogleDriveHandler:
 16 |     """
 17 |     A handler class for interacting with Google Drive API.
 18 |     """
 19 |     # Define the scope for Google Drive API access
 20 |     SCOPES: List[str] = ['https://www.googleapis.com/auth/drive.readonly']
 21 | 
 22 |     def __init__(self, credentials_path: str):
 23 |         """
 24 |         Initialize the GoogleDriveHandler with the path to the credentials file.
 25 | 
 26 |         Parameters
 27 |         ----------
 28 |         credentials_path : str
 29 |             Path to the credentials JSON file.
 30 |         """
 31 |         self.credentials_path: str = credentials_path
 32 |         self.credentials: Credentials = self._get_credentials()
 33 |         # Build the Google Drive service object
 34 |         self.service = build('drive', 'v3', credentials=self.credentials)
 35 | 
 36 |     def _get_credentials(self) -> Credentials:
 37 |         """
 38 |         Obtain user credentials for accessing Google Drive.
 39 | 
 40 |         Returns
 41 |         -------
 42 |         Credentials
 43 |             The authenticated credentials for Google Drive API access.
 44 |         """
 45 |         creds: Optional[Credentials] = None
 46 |         # Check if token.json file exists to load existing credentials
 47 |         if os.path.exists('token.json'):
 48 |             creds = Credentials.from_authorized_user_file('token.json', self.SCOPES)
 49 |         # If no valid credentials are available, prompt the user to log in
 50 |         if not creds or not creds.valid:
 51 |             if creds and creds.expired and creds.refresh_token:
 52 |                 # Refresh the credentials if they are expired
 53 |                 creds.refresh(Request())
 54 |             else:
 55 |                 # Run the OAuth flow to get new credentials
 56 |                 flow = InstalledAppFlow.from_client_secrets_file(self.credentials_path, self.SCOPES)
 57 |                 creds = flow.run_local_server(port=0)
 58 |             # Save the credentials for the next run
 59 |             with open('token.json', 'w') as token:
 60 |                 token.write(creds.to_json())
 61 |         return creds
 62 | 
 63 |     def list_files_in_folder(self, folder_id: str) -> List[Dict[str, str]]:
 64 |         """
 65 |         List all files in a specified Google Drive folder.
 66 | 
 67 |         Parameters
 68 |         ----------
 69 |         folder_id : str
 70 |             The ID of the Google Drive folder.
 71 | 
 72 |         Returns
 73 |         -------
 74 |         List[Dict[str, str]]
 75 |             A list of dictionaries containing file IDs and names.
 76 |         """
 77 |         query = f"'{folder_id}' in parents"
 78 |         # Execute the query to list files in the folder
 79 |         results = self.service.files().list(q=query, fields="files(id, name)").execute()
 80 |         return results.get('files', [])
 81 | 
 82 |     def download_file(self, file_id: str, output_path: str) -> None:
 83 |         """
 84 |         Download a file from Google Drive given its file ID.
 85 | 
 86 |         Parameters
 87 |         ----------
 88 |         file_id : str
 89 |             The ID of the file to be downloaded.
 90 |         output_path : str
 91 |             The local path where the downloaded file will be saved.
 92 |         """
 93 |         request = self.service.files().get_media(fileId=file_id)
 94 |         file = io.BytesIO()
 95 |         downloader = MediaIoBaseDownload(file, request)
 96 |         done: bool = False
 97 |         print("Downloading file:")
 98 |         # Download the file in chunks and show progress
 99 |         while not done:
100 |             status, done = downloader.next_chunk()
101 |             print_progress_bar(int(status.progress() * 100))
102 |         print("\nDownload complete!")
103 |         # Write the downloaded file to the specified output path
104 |         with open(output_path, "wb") as f:
105 |             f.write(file.getvalue())
106 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/sandbox/notion.ts:
--------------------------------------------------------------------------------
  1 | import axios from 'axios';
  2 | import { marked } from 'marked';
  3 | import { MarkdownConverter } from './markdown';
  4 | 
  5 | interface PageProperties {
  6 |     [key: string]: any; // Adjust this type based on the expected structure of page properties
  7 | }
  8 | 
  9 | interface Pages {
 10 |     context: { [key: string]: string };
 11 |     profile: string;
 12 |     prompt: string[];
 13 | }
 14 | 
 15 | class NotionAPI {
 16 |     notionToken: string;
 17 |     pageId: string | null;
 18 |     headers: { [key: string]: string };
 19 |     depth2item_count: { [key: number]: number };
 20 |     apiUrl: string;
 21 | 
 22 |     constructor(notionToken: string) {
 23 |         this.notionToken = notionToken;
 24 |         this.pageId = null; 
 25 |         this.headers = {
 26 |             'Authorization': `Bearer ${notionToken}`,
 27 |             'Content-Type': 'application/json',
 28 |             'Notion-Version': '2022-06-28'
 29 |         };
 30 |         this.depth2item_count = {};
 31 |         this.apiUrl = '';
 32 |     }
 33 | 
 34 |     setPageId(pageId: string) {
 35 |         this.pageId = pageId;
 36 |         this.apiUrl = `https://api.notion.com/v1/blocks/${pageId}/children`;
 37 |     }
 38 | 
 39 |     async fetchPageContent(): Promise<Pages> {
 40 |         try {
 41 |             const content = await this._fetchPageContentRecursive(this.pageId!, "Principle Page", true) as Pages;
 42 |             return content;
 43 |         } catch (error) {
 44 |             console.error('Error fetching Notion page content on page id: ', this.pageId, error);
 45 |             throw new Error('Failed to fetch Notion page content');
 46 |         }
 47 |     }
 48 | 
 49 |     async _fetchPageProperties(pageId: string): Promise<PageProperties | null> {
 50 |         try {
 51 |             const response = await axios.get(`https://api.notion.com/v1/pages/${pageId}`, { headers: this.headers });
 52 |             return response.data.properties; // Return the properties of the page
 53 |         } catch (error) {
 54 |             console.error('Error fetching Notion page properties:', error);
 55 |             return null; // Return null if there's an error
 56 |         }
 57 |     }
 58 | 
 59 |     async _fetchPageContentRecursive(
 60 |         pageId: string,
 61 |         pageTitle: string,
 62 |         is_page = false,
 63 |         pages: Pages = { context: {}, profile: "You are a helpful assistant. Always answer in markdown.", prompt: [] },
 64 |         pages_ids: string[] = [],
 65 |         depth = 1
 66 |     ): Promise<Pages | string>  {
 67 |         if (Object.keys(pages).length === 0) {
 68 |             pages = {
 69 |                 context: {},
 70 |                 profile: "You are a helpful assistant. Always answer in markdown.",
 71 |                 prompt: []
 72 |             };
 73 |         }
 74 | 
 75 |         const content: string[] = [];
 76 | 
 77 |         if (depth === 1) {
 78 |             this.depth2item_count = {};
 79 |         }
 80 | 
 81 |         let page_type = "context";
 82 |         if (is_page) {
 83 |             console.info("pageId", pageId);
 84 | 
 85 |             if (pages_ids.includes(pageId)) {
 86 |                 console.warn("pageId already in pages_ids", pageId);
 87 |                 return pages;
 88 |             }
 89 |             pages_ids.push(pageId);
 90 | 
 91 |             const pageProperties = await this._fetchPageProperties(pageId);
 92 |             if (pageProperties && pageProperties["FlowExtType"]) {
 93 |                 console.log("FlowExtType", pageProperties["FlowExtType"]);
 94 |                 if (pageProperties["FlowExtType"].rich_text.length > 0) {
 95 |                     page_type = pageProperties["FlowExtType"].rich_text[0].plain_text;
 96 |                 }
 97 |             }
 98 |             console.info("page_type", page_type);
 99 |         }
100 | 
101 |         let blocks: any[] = [];
102 |         let startCursor: string | undefined = undefined;
103 |         let hasMore = true;
104 |     
105 |         while (hasMore) {
106 |             const response: any = await axios.get(`https://api.notion.com/v1/blocks/${pageId}/children`, {
107 |                 headers: this.headers,
108 |                 params: {
109 |                     start_cursor: startCursor,
110 |                     page_size: 100
111 |                 }
112 |             });
113 |     
114 |             blocks = blocks.concat(response.data.results);
115 |             hasMore = response.data.has_more;
116 |             startCursor = response.data.next_cursor;
117 |         }
118 |     
119 | 
120 |         for (const block of blocks) {
121 |             if (block.type === 'table') {
122 |                 console.log("table loading", block);
123 |                 const tableContent = await this._handleTable(block);
124 |                 content.push(tableContent);
125 |             } else if (block[block.type] && block[block.type].rich_text && Array.isArray(block[block.type].rich_text)) {
126 |                 let joined_text = await this._handleRichText(block, depth);
127 |                 content.push(joined_text);
128 | 
129 |                 for (let i = 0; i < block[block.type].rich_text.length; i++) {
130 |                     let text = block[block.type].rich_text[i];
131 |                     if (text && text.type === 'mention' && text.href) {
132 |                         const blockId = text.href.split('/')[text.href.split('/').length - 1];
133 |                         pages = await this._fetchPageContentRecursive(
134 |                             blockId, "Attached href Page : " + text.plain_text, true, pages, pages_ids, 1) as Pages;
135 |                     }
136 |                 }
137 | 
138 |                 if (block.has_children) {
139 |                     const subBlockContent = await this._fetchPageContentRecursive(
140 |                         block.id, "", false, pages, pages_ids, depth + 1) as string;
141 |                     content.push(subBlockContent);
142 |                 }
143 |             } else if (block.type === 'column_list' || block.type === 'column') {
144 |                 console.log("column_list or column", block);
145 |             } else if (block.type === 'divider') {
146 |                 content.push('------------------------------');
147 |             } else if (block.type === 'child_page') {
148 |                 console.info("href _fetchPageContentRecursive", block.id);
149 |                 pages = await this._fetchPageContentRecursive(
150 |                     block.id, "Attached Page: " + block.child_page.title, true, pages, pages_ids, 1) as Pages;
151 |             } else {
152 |                 console.warn('Unexpected block type:', block);
153 |             }
154 |         }
155 | 
156 |         console.log("content", content);
157 | 
158 |         if (is_page && page_type === "context") {
159 |             pages[page_type][pageTitle] = content.join('\n');
160 |             return pages;
161 |         } else if (is_page && page_type === "prompt") {
162 |             pages[page_type].push(content.join('\n'));
163 |             return pages;
164 |         } else if (is_page && page_type === "profile") {
165 |             pages[page_type] = content.join('\n');
166 |             return pages;
167 |         } else {
168 |             return content.join('\n');
169 |         }
170 |     }
171 | 
172 |     async _handleRichText(block: any, depth: number): Promise<string> {
173 |         if (block.type === 'numbered_list_item') {
174 |             if (!this.depth2item_count[depth]) {
175 |                 this.depth2item_count[depth] = 1;
176 |             }
177 |             let prefix = '-'.repeat(depth);
178 |             if (depth === 1) {
179 |                 prefix = '';
180 |             }
181 |             let result = `${prefix} ${this.depth2item_count[depth]}. ${this._getRichTextContent(block)}`;
182 |             this.depth2item_count[depth]++;
183 |             return result;
184 |         } else {
185 |             this.depth2item_count[depth] = 1;
186 |         }
187 | 
188 |         if (block.type === 'bulleted_list_item') {
189 |             let bulletPrefix = '-'.repeat(depth);
190 |             return `${bulletPrefix} ${this._getRichTextContent(block)}`;
191 |         }
192 | 
193 |         switch (block.type) {
194 |             case 'table':
195 |                 return this._handleTable(block);
196 |             case 'heading_1':
197 |                 return `# ${this._getRichTextContent(block)}`;
198 |             case 'heading_2':
199 |                 return `## ${this._getRichTextContent(block)}`;
200 |             case 'heading_3':
201 |                 return `### ${this._getRichTextContent(block)}`;
202 |             case 'paragraph':
203 |                 return this._getRichTextContent(block);
204 |             case 'to_do':
205 |                 return `- [${block.to_do.checked ? 'x' : ' '}] ${this._getRichTextContent(block)}`;
206 |             case 'toggle':
207 |                 return `<details><summary>${this._getRichTextContent(block)}</summary></details>`;
208 |             case 'code':
209 |                 return `\`\`\`\n${this._getRichTextContent(block)}\n\`\`\``;
210 |             case 'quote':
211 |                 return `> ${this._getRichTextContent(block)}`;
212 |             case 'callout':
213 |                 return `> ${this._getRichTextContent(block)}`;
214 |             case 'image':
215 |                 return `![Image](${block.image.file.url})`;
216 |             case 'video':
217 |                 return `[Video](${block.video.file.url})`;
218 |             case 'file':
219 |                 return `[File](${block.file.file.url})`;
220 |             case 'pdf':
221 |                 return `[PDF](${block.pdf.file.url})`;
222 |             case 'bookmark':
223 |                 return `[Bookmark](${block.bookmark.url})`;
224 |             case 'embed':
225 |                 return `[Embed](${block.embed.url})`;
226 |             case 'link_preview':
227 |                 return `[Link Preview](${block.link_preview.url})`;
228 |             case 'column_list':
229 |             case 'column':
230 |                 return ''; // Handled recursively
231 |             case 'child_page':
232 |             case 'child_database':
233 |                 return ''; // Ignored
234 |             default:
235 |                 console.warn('Unexpected block type:', block);
236 |                 return '';
237 |         }
238 |     }
239 | 
240 |     _getRichTextContent(block: any): string {
241 |         let joined_text = '';
242 |         for (let i = 0; i < block[block.type].rich_text.length; i++) {
243 |             let text = block[block.type].rich_text[i];
244 |             if (text) {
245 |                 if (text.type === 'text' && text.text.content) {
246 |                     joined_text += this._applyMarkdown(text.text.content, text.annotations) + ' ';
247 |                 } else if (text.type === 'mention' && text.plain_text) {
248 |                     joined_text += this._applyMarkdown(text.plain_text, text.annotations) + ' ';
249 |                 }
250 |             }
251 |         }
252 |         return joined_text.trim();
253 |     }
254 | 
255 |     _formatTableRows(rows: any[], tableWidth: number, hasColumnHeader: boolean, hasRowHeader: boolean): string {
256 |         let markdown = '';
257 |         let headerRow = '';
258 |     
259 |         rows.forEach((row, rowIndex) => {
260 |             if (row.type !== 'table_row') return;
261 |     
262 |             let rowContent = '|';
263 |             row.table_row.cells.forEach((cell: any[], cellIndex: number) => {
264 |                 const cellContent = cell.map(textObj => textObj.plain_text).join(' ');
265 |                 rowContent += ` ${cellContent} |`;
266 |     
267 |                 if (rowIndex === 0 && hasColumnHeader) {
268 |                     headerRow += '| ' + '-'.repeat(cellContent.length) + ' ';
269 |                 }
270 |             });
271 |     
272 |             markdown += rowContent + '\n';
273 |     
274 |             if (rowIndex === 0 && hasColumnHeader) {
275 |                 markdown += headerRow + '|\n';
276 |             }
277 |         });
278 |     
279 |         return markdown;
280 |     }
281 | 
282 |     async _fetchTableRows(blockId: string): Promise<any[]> {
283 |         try {
284 |             const response = await axios.get(`https://api.notion.com/v1/blocks/${blockId}/children`, { headers: this.headers });
285 |             return response.data.results;
286 |         } catch (error) {
287 |             console.error('Error fetching table rows:', error);
288 |             return [];
289 |         }
290 |     }
291 |     
292 |     async _handleTable(block: any): Promise<string> {
293 |         const tableWidth = block.table.table_width;
294 |         const hasColumnHeader = block.table.has_column_header;
295 |         const hasRowHeader = block.table.has_row_header;
296 |     
297 |         const rows = await this._fetchTableRows(block.id);
298 |         return this._formatTableRows(rows, tableWidth, hasColumnHeader, hasRowHeader);
299 |     }
300 | 
301 |     _applyMarkdown(content: string, annotations: any): string {
302 |         if (annotations.bold) content = `**${content}**`;
303 |         if (annotations.italic) content = `*${content}*`;
304 |         if (annotations.strikethrough) content = `~~${content}~~`;
305 |         if (annotations.underline) content = `<u>${content}</u>`;
306 |         if (annotations.code) content = `\`${content}\``;
307 |         return content;
308 |     }
309 | 
310 | 
311 |     async _fetchPageTitle(pageId: string): Promise<string> {
312 |         try {
313 |             const response = await axios.get(`https://api.notion.com/v1/pages/${pageId}`, { headers: this.headers });
314 |             const titleProperty = response.data.properties.title;
315 |             if (titleProperty && titleProperty.title && titleProperty.title.length > 0) {
316 |                 return titleProperty.title[0].plain_text;
317 |             }
318 |             return "Untitled Sub-Page";
319 |         } catch (error) {
320 |             console.error('Error fetching Notion page title:', error);
321 |             return "Untitled Sub-Page";
322 |         }
323 |     }
324 | 
325 |     async addMarkdownBlock(markdownContent: string): Promise<string> {
326 |         const htmlContent = marked.parse(markdownContent); // Use marked.parse
327 |         const notionBlocks = MarkdownConverter.convertHtmlToNotionBlocks(htmlContent as string);
328 | 
329 |         try {
330 |             const response = await axios.patch(this.apiUrl, {
331 |                 children: notionBlocks
332 |             }, {
333 |                 headers: this.headers
334 |             });
335 | 
336 |             console.log('Notion API response:', response.data);
337 |             return 'Markdown content added to Notion page!';
338 |         } catch (error) {
339 |             console.error('Error adding block to Notion:', error as Error); // Cast error to Error type
340 |             return 'Error writing to Notion: ' + (error instanceof Error ? error.message : 'Unknown error'); // Check if error is an instance of Error
341 |         }
342 |     }
343 | }
344 | 
345 | export { NotionAPI };


--------------------------------------------------------------------------------
/flow_watcher/notion.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | from typing import Any, Dict, Optional, List, Set
  3 | import json
  4 | 
  5 | class NotionAPI:
  6 |     """
  7 |     A class to interact with the Notion API.
  8 |     
  9 |     Attributes
 10 |     ----------
 11 |     api_key : str
 12 |         The API key for authenticating with the Notion API.
 13 |     database_id : str
 14 |         The ID of the Notion database to interact with.
 15 |     base_url : str
 16 |         The base URL for the Notion API.
 17 |     headers : Dict[str, str]
 18 |         The headers to include in API requests.
 19 |     """
 20 |     def __init__(self, api_key: str, database_id: str) -> None:
 21 |         """
 22 |         Initialize the NotionAPI class with the provided API key and database ID.
 23 |         
 24 |         Parameters
 25 |         ----------
 26 |         api_key : str
 27 |             The API key for authenticating with the Notion API.
 28 |         database_id : str
 29 |             The ID of the Notion database to interact with.
 30 |         """
 31 |         self.api_key = api_key
 32 |         self.database_id = database_id
 33 |         self.base_url = "https://api.notion.com/v1"
 34 |         self.headers = {
 35 |             "Authorization": f"Bearer {self.api_key}",
 36 |             "Content-Type": "application/json",
 37 |             "Notion-Version": "2022-06-28"
 38 |         }
 39 | 
 40 |     def retrieve_database_entry(self, entry_id: str) -> Dict[str, Any]:
 41 |         """
 42 |         Retrieve a specific entry from the Notion database.
 43 |         
 44 |         Parameters
 45 |         ----------
 46 |         entry_id : str
 47 |             The ID of the entry to retrieve.
 48 |         
 49 |         Returns
 50 |         -------
 51 |         Dict[str, Any]
 52 |             The JSON response from the Notion API containing the entry details.
 53 |         """
 54 |         url = f"{self.base_url}/pages/{entry_id}"
 55 |         response = requests.get(url, headers=self.headers)
 56 |         response.raise_for_status()
 57 |         return response.json()
 58 | 
 59 |     def write_new_page(self, title: str, content: str) -> Dict[str, Any]:
 60 |         """
 61 |         Create a new page in the Notion database with the given title and content.
 62 |         
 63 |         Parameters
 64 |         ----------
 65 |         title : str
 66 |             The title of the new page.
 67 |         content : str
 68 |             The content of the new page.
 69 |         
 70 |         Returns
 71 |         -------
 72 |         Dict[str, Any]
 73 |             The JSON response from the Notion API containing the new page details.
 74 |         """
 75 |         url = f"{self.base_url}/pages"
 76 |         data = {
 77 |             "parent": {"database_id": self.database_id},
 78 |             "properties": {
 79 |                 "title": {
 80 |                     "title": [{"text": {"content": title}}]
 81 |                 }
 82 |             },
 83 |             "children": [
 84 |                 {
 85 |                     "object": "block",
 86 |                     "type": "paragraph",
 87 |                     "paragraph": {
 88 |                         "rich_text": [{"type": "text", "text": {"content": content}}]
 89 |                     }
 90 |                 }
 91 |             ]
 92 |         }
 93 |         response = requests.post(url, headers=self.headers, json=data)
 94 |         response.raise_for_status()
 95 |         return response.json()
 96 | 
 97 |     def read_page_markdown(self, page_id: str) -> str:
 98 |         """
 99 |         Retrieve the content of a Notion page and convert it to Markdown.
100 |         
101 |         Parameters
102 |         ----------
103 |         page_id : str
104 |             The ID of the page to retrieve.
105 |         
106 |         Returns
107 |         -------
108 |         str
109 |             The content of the page in Markdown format.
110 |         """
111 |         processed_pages: Set[str] = set()
112 |         markdown_content = self._fetch_page_content_recursive(page_id, processed_pages)
113 |         return markdown_content
114 | 
115 |     def _fetch_page_content_recursive(self, page_id: str, processed_pages: Set[str]) -> str:
116 |         """
117 |         Recursively fetch the content of a Notion page and its children.
118 |         
119 |         Parameters
120 |         ----------
121 |         page_id : str
122 |             The ID of the page to retrieve.
123 |         processed_pages : Set[str]
124 |             A set of page IDs that have already been processed to avoid recursion.
125 |         
126 |         Returns
127 |         -------
128 |         str
129 |             The content of the page and its children in Markdown format.
130 |         """
131 |         if page_id in processed_pages:
132 |             print(f"Page {page_id} already processed. Skipping to avoid recursion.")
133 |             return ""
134 |         processed_pages.add(page_id)
135 | 
136 |         url = f"{self.base_url}/blocks/{page_id}/children"
137 |         response = requests.get(url, headers=self.headers)
138 |         
139 |         try:
140 |             response.raise_for_status()
141 |         except requests.exceptions.HTTPError as e:
142 |             print(f"HTTP error occurred: {e}")
143 |             print(f"Response content: {response.content}")
144 |             return ""
145 |         
146 |         blocks = response.json().get('results', [])
147 |         markdown_content = self._convert_blocks_to_markdown(blocks, processed_pages)
148 |         return markdown_content
149 | 
150 |     def _convert_blocks_to_markdown(self, blocks: List[Dict[str, Any]], processed_pages: Set[str]) -> str:
151 |         """
152 |         Convert Notion blocks to Markdown formatted text.
153 |         
154 |         Parameters
155 |         ----------
156 |         blocks : List[Dict[str, Any]]
157 |             A list of Notion blocks to convert.
158 |         processed_pages : Set[str]
159 |             A set of page IDs that have already been processed to avoid recursion.
160 |         
161 |         Returns
162 |         -------
163 |         str
164 |             The content of the blocks in Markdown format.
165 |         """
166 |         markdown_lines = []
167 |         for block in blocks:
168 |             block_type = block.get('type')
169 |             if not block_type:
170 |                 continue
171 | 
172 |             content = ""
173 |             if block_type == 'paragraph':
174 |                 content = self._handle_paragraph(block['paragraph'])
175 |             elif block_type == 'heading_1':
176 |                 content = self._handle_heading(block['heading_1'], level=1)
177 |             elif block_type == 'heading_2':
178 |                 content = self._handle_heading(block['heading_2'], level=2)
179 |             elif block_type == 'heading_3':
180 |                 content = self._handle_heading(block['heading_3'], level=3)
181 |             elif block_type == 'bulleted_list_item':
182 |                 content = self._handle_bulleted_list_item(block['bulleted_list_item'])
183 |             elif block_type == 'numbered_list_item':
184 |                 content = self._handle_numbered_list_item(block['numbered_list_item'])
185 |             elif block_type == 'to_do':
186 |                 content = self._handle_to_do(block['to_do'])
187 |             elif block_type == 'toggle':
188 |                 content = self._handle_toggle(block['toggle'], processed_pages)
189 |             elif block_type == 'code':
190 |                 content = self._handle_code(block['code'])
191 |             elif block_type == 'quote':
192 |                 content = self._handle_quote(block['quote'])
193 |             elif block_type == 'divider':
194 |                 content = self._handle_divider()
195 |             elif block_type == 'child_page':
196 |                 child_page = block.get('child_page', {})
197 |                 child_page_id = child_page.get('page_id')  # Updated from 'id' to 'page_id'
198 |                 child_page_title = child_page.get('title', 'Untitled Page')
199 |                 if child_page_id:
200 |                     child_markdown = self._fetch_page_content_recursive(child_page_id, processed_pages)
201 |                     content = f"\n### {child_page_title}\n\n{child_markdown}\n"
202 |                 else:
203 |                     print(f"Child page ID not found in block: {block}")
204 |             else:
205 |                 print(f"Unhandled block type: {block_type}")
206 |                 continue
207 | 
208 |             if content:
209 |                 markdown_lines.append(content)
210 | 
211 |         return "\n\n".join(markdown_lines)
212 | 
213 |     def _handle_paragraph(self, paragraph: Dict[str, Any]) -> str:
214 |         """
215 |         Handle paragraph blocks and convert them to Markdown.
216 |         
217 |         Parameters
218 |         ----------
219 |         paragraph : Dict[str, Any]
220 |             The paragraph block to handle.
221 |         
222 |         Returns
223 |         -------
224 |         str
225 |             The content of the paragraph in Markdown format.
226 |         """
227 |         texts = paragraph.get('rich_text', [])
228 |         return self._compose_text(texts)
229 | 
230 |     def _handle_heading(self, heading: Dict[str, Any], level: int) -> str:
231 |         """
232 |         Handle heading blocks and convert them to Markdown.
233 |         
234 |         Parameters
235 |         ----------
236 |         heading : Dict[str, Any]
237 |             The heading block to handle.
238 |         level : int
239 |             The level of the heading (1, 2, or 3).
240 |         
241 |         Returns
242 |         -------
243 |         str
244 |             The content of the heading in Markdown format.
245 |         """
246 |         texts = heading.get('rich_text', [])
247 |         prefix = '#' * level
248 |         return f"{prefix} {self._compose_text(texts)}"
249 | 
250 |     def _handle_bulleted_list_item(self, list_item: Dict[str, Any]) -> str:
251 |         """
252 |         Handle bulleted list item blocks and convert them to Markdown.
253 |         
254 |         Parameters
255 |         ----------
256 |         list_item : Dict[str, Any]
257 |             The bulleted list item block to handle.
258 |         
259 |         Returns
260 |         -------
261 |         str
262 |             The content of the bulleted list item in Markdown format.
263 |         """
264 |         texts = list_item.get('rich_text', [])
265 |         return f"- {self._compose_text(texts)}"
266 | 
267 |     def _handle_numbered_list_item(self, list_item: Dict[str, Any]) -> str:
268 |         """
269 |         Handle numbered list item blocks and convert them to Markdown.
270 |         
271 |         Parameters
272 |         ----------
273 |         list_item : Dict[str, Any]
274 |             The numbered list item block to handle.
275 |         
276 |         Returns
277 |         -------
278 |         str
279 |             The content of the numbered list item in Markdown format.
280 |         """
281 |         texts = list_item.get('rich_text', [])
282 |         return f"1. {self._compose_text(texts)}"
283 | 
284 |     def _handle_to_do(self, to_do: Dict[str, Any]) -> str:
285 |         """
286 |         Handle to-do blocks and convert them to Markdown.
287 |         
288 |         Parameters
289 |         ----------
290 |         to_do : Dict[str, Any]
291 |             The to-do block to handle.
292 |         
293 |         Returns
294 |         -------
295 |         str
296 |             The content of the to-do block in Markdown format.
297 |         """
298 |         texts = to_do.get('rich_text', [])
299 |         checked = to_do.get('checked', False)
300 |         checkbox = "[x]" if checked else "[ ]"
301 |         return f"- {checkbox} {self._compose_text(texts)}"
302 | 
303 |     def _handle_toggle(self, toggle: Dict[str, Any], processed_pages: Set[str]) -> str:
304 |         """
305 |         Handle toggle blocks and convert them to Markdown.
306 |         
307 |         Parameters
308 |         ----------
309 |         toggle : Dict[str, Any]
310 |             The toggle block to handle.
311 |         processed_pages : Set[str]
312 |             A set of page IDs that have already been processed to avoid recursion.
313 |         
314 |         Returns
315 |         -------
316 |         str
317 |             The content of the toggle block in Markdown format.
318 |         """
319 |         texts = toggle.get('rich_text', [])
320 |         summary = self._compose_text(texts)
321 |         # Fetch children of the toggle block
322 |         toggle_id = toggle.get('id')
323 |         if toggle_id:
324 |             url = f"{self.base_url}/blocks/{toggle_id}/children"
325 |             response = requests.get(url, headers=self.headers)
326 |             try:
327 |                 response.raise_for_status()
328 |             except requests.exceptions.HTTPError as e:
329 |                 print(f"HTTP error occurred while fetching toggle children: {e}")
330 |                 return f"> <details><summary>{summary}</summary>\n\n</details>"
331 |             blocks = response.json().get('results', [])
332 |             nested_markdown = self._convert_blocks_to_markdown(blocks, processed_pages)
333 |             return f"> <details><summary>{summary}</summary>\n\n{nested_markdown}\n</details>"
334 |         else:
335 |             return f"> <details><summary>{summary}</summary>\n\n</details>"
336 | 
337 |     def _handle_code(self, code: Dict[str, Any]) -> str:
338 |         """
339 |         Handle code blocks and convert them to Markdown.
340 |         
341 |         Parameters
342 |         ----------
343 |         code : Dict[str, Any]
344 |             The code block to handle.
345 |         
346 |         Returns
347 |         -------
348 |         str
349 |             The content of the code block in Markdown format.
350 |         """
351 |         language = code.get('language', '')
352 |         content = code.get('rich_text', [])
353 |         code_content = self._compose_text(content)
354 |         return f"```{language}\n{code_content}\n```"
355 | 
356 |     def _handle_quote(self, quote: Dict[str, Any]) -> str:
357 |         """
358 |         Handle quote blocks and convert them to Markdown.
359 |         
360 |         Parameters
361 |         ----------
362 |         quote : Dict[str, Any]
363 |             The quote block to handle.
364 |         
365 |         Returns
366 |         -------
367 |         str
368 |             The content of the quote block in Markdown format.
369 |         """
370 |         texts = quote.get('rich_text', [])
371 |         return f"> {self._compose_text(texts)}"
372 | 
373 |     def _handle_divider(self) -> str:
374 |         """
375 |         Handle divider blocks and convert them to Markdown.
376 |         
377 |         Returns
378 |         -------
379 |         str
380 |             The Markdown representation of a divider.
381 |         """
382 |         return "---"
383 | 
384 |     def _compose_text(self, texts: List[Dict[str, Any]]) -> str:
385 |         """
386 |         Compose rich text objects into a single string with Markdown formatting.
387 |         
388 |         Parameters
389 |         ----------
390 |         texts : List[Dict[str, Any]]
391 |             A list of rich text objects to compose.
392 |         
393 |         Returns
394 |         -------
395 |         str
396 |             The composed text in Markdown format.
397 |         """
398 |         composed = ""
399 |         for text in texts:
400 |             if text.get('type') == 'text':
401 |                 content = text['text']['content']
402 |                 annotations = text.get('annotations', {})
403 |                 content = self._apply_markdown_annotations(content, annotations)
404 |                 composed += content
405 |             elif text.get('type') == 'mention':
406 |                 # Handle mentions if needed
407 |                 pass
408 |             # Handle other text types as needed
409 |         return composed
410 | 
411 |     def _apply_markdown_annotations(self, content: str, annotations: Dict[str, Any]) -> str:
412 |         """
413 |         Apply Markdown formatting based on text annotations.
414 |         
415 |         Parameters
416 |         ----------
417 |         content : str
418 |             The text content to format.
419 |         annotations : Dict[str, Any]
420 |             The annotations to apply.
421 |         
422 |         Returns
423 |         -------
424 |         str
425 |             The formatted text.
426 |         """
427 |         if annotations.get('bold'):
428 |             content = f"**{content}**"
429 |         if annotations.get('italic'):
430 |             content = f"*{content}*"
431 |         if annotations.get('underline'):
432 |             content = f"<u>{content}</u>"
433 |         if annotations.get('strikethrough'):
434 |             content = f"~~{content}~~"
435 |         if annotations.get('code'):
436 |             content = f"`{content}`"
437 |         return content
438 | 
439 |     def write_markdown_to_page(self, page_id: str, markdown_content: str) -> Dict[str, Any]:
440 |         """
441 |         Write Markdown content to a Notion page by converting it to Notion blocks.
442 |         
443 |         Parameters
444 |         ----------
445 |         page_id : str
446 |             The ID of the page to write to.
447 |         markdown_content : str
448 |             The Markdown content to write.
449 |         
450 |         Returns
451 |         -------
452 |         Dict[str, Any]
453 |             The JSON response from the Notion API containing the updated page details.
454 |         """
455 |         blocks = self._convert_markdown_to_blocks(markdown_content)
456 |         url = f"{self.base_url}/blocks/{page_id}/children"
457 |         data = {
458 |             "children": blocks
459 |         }
460 |         response = requests.patch(url, headers=self.headers, json=data)
461 |         response.raise_for_status()
462 |         return response.json()
463 | 
464 |     def _convert_markdown_to_blocks(self, markdown: str) -> List[Dict[str, Any]]:
465 |         """
466 |         Convert Markdown text to Notion blocks.
467 |         
468 |         Parameters
469 |         ----------
470 |         markdown : str
471 |             The Markdown text to convert.
472 |         
473 |         Returns
474 |         -------
475 |         List[Dict[str, Any]]
476 |             A list of Notion blocks representing the Markdown content.
477 |         """
478 |         lines = markdown.split('\n')
479 |         blocks = []
480 |         for line in lines:
481 |             if line.startswith('### '):
482 |                 blocks.append({
483 |                     "object": "block",
484 |                     "type": "heading_3",
485 |                     "heading_3": {
486 |                         "rich_text": [{"type": "text", "text": {"content": line[4:]}}]
487 |                     }
488 |                 })
489 |             elif line.startswith('## '):
490 |                 blocks.append({
491 |                     "object": "block",
492 |                     "type": "heading_2",
493 |                     "heading_2": {
494 |                         "rich_text": [{"type": "text", "text": {"content": line[3:]}}]
495 |                     }
496 |                 })
497 |             elif line.startswith('# '):
498 |                 blocks.append({
499 |                     "object": "block",
500 |                     "type": "heading_1",
501 |                     "heading_1": {
502 |                         "rich_text": [{"type": "text", "text": {"content": line[2:]}}]
503 |                     }
504 |                 })
505 |             elif line.startswith('- [x] '):
506 |                 blocks.append({
507 |                     "object": "block",
508 |                     "type": "to_do",
509 |                     "to_do": {
510 |                         "text": [{"type": "text", "text": {"content": line[6:]}}],
511 |                         "checked": True
512 |                     }
513 |                 })
514 |             elif line.startswith('- [ ] '):
515 |                 blocks.append({
516 |                     "object": "block",
517 |                     "type": "to_do",
518 |                     "to_do": {
519 |                         "text": [{"type": "text", "text": {"content": line[6:]}}],
520 |                         "checked": False
521 |                     }
522 |                 })
523 |             elif line.startswith('- '):
524 |                 blocks.append({
525 |                     "object": "block",
526 |                     "type": "bulleted_list_item",
527 |                     "bulleted_list_item": {
528 |                         "rich_text": [{"type": "text", "text": {"content": line[2:]}}]
529 |                     }
530 |                 })
531 |             elif line.startswith('1. '):
532 |                 blocks.append({
533 |                     "object": "block",
534 |                     "type": "numbered_list_item",
535 |                     "numbered_list_item": {
536 |                         "rich_text": [{"type": "text", "text": {"content": line[3:]}}]
537 |                     }
538 |                 })
539 |             elif line.startswith('```'):
540 |                 language = line[3:].strip()
541 |                 blocks.append({
542 |                     "object": "block",
543 |                     "type": "code",
544 |                     "code": {
545 |                         "text": [{"type": "text", "text": {"content": ""}}],
546 |                         "language": language
547 |                     }
548 |                 })
549 |             elif line.startswith('---') or line.startswith('***'):
550 |                 blocks.append({
551 |                     "object": "block",
552 |                     "type": "divider",
553 |                     "divider": {}
554 |                 })
555 |             elif line.startswith('> '):
556 |                 blocks.append({
557 |                     "object": "block",
558 |                     "type": "quote",
559 |                     "quote": {
560 |                         "rich_text": [{"type": "text", "text": {"content": line[2:]}}]
561 |                     }
562 |                 })
563 |             elif line.startswith('###'):
564 |                 # Handle details summary or other extended markdown syntax if needed
565 |                 blocks.append({
566 |                     "object": "block",
567 |                     "type": "toggle",
568 |                     "toggle": {
569 |                         "rich_text": [{"type": "text", "text": {"content": line}}]
570 |                     }
571 |                 })
572 |             else:
573 |                 blocks.append({
574 |                     "object": "block",
575 |                     "type": "paragraph",
576 |                     "paragraph": {
577 |                         "rich_text": [{"type": "text", "text": {"content": line}}]
578 |                     }
579 |                 })
580 |         return blocks


--------------------------------------------------------------------------------