├── flow_watcher ├── __init__.py ├── __pycache__ │ ├── drive.cpython-310.pyc │ ├── utils.cpython-310.pyc │ └── __init__.cpython-310.pyc ├── utils.py ├── oai.py ├── drive.py └── notion.py ├── requirements.txt ├── sandbox ├── tts.py ├── notion_read.py ├── list_and_download.py └── notion.ts ├── docs ├── GOOGLE_DRIVE_SETUP.md └── NOTION_SETUP.md ├── setup.py ├── prompts ├── typing.md └── comments.md ├── README.md └── LICENSE /flow_watcher/__init__.py: -------------------------------------------------------------------------------- 1 | from . import drive 2 | from . import utils 3 | from . import oai 4 | from . import notion -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | google-api-python-client==2.70.0 2 | google-auth-httplib2==0.1.0 3 | google-auth-oauthlib==0.4.6 4 | openai 5 | -------------------------------------------------------------------------------- /flow_watcher/__pycache__/drive.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thibo73800/flow-watcher/main/flow_watcher/__pycache__/drive.cpython-310.pyc -------------------------------------------------------------------------------- /flow_watcher/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thibo73800/flow-watcher/main/flow_watcher/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /flow_watcher/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thibo73800/flow-watcher/main/flow_watcher/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /flow_watcher/utils.py: -------------------------------------------------------------------------------- 1 | def print_progress_bar(progress): 2 | bar_length = 20 3 | filled_length = int(bar_length * progress // 100) 4 | bar = '█' * filled_length + '-' * (bar_length - filled_length) 5 | print(f'\r[{bar}] {progress}%', end='') -------------------------------------------------------------------------------- /sandbox/tts.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import os 3 | import yaml 4 | 5 | from flow_watcher import oai 6 | 7 | # Load configuration from config.yaml 8 | with open('config.yaml', 'r') as config_file: 9 | config = yaml.safe_load(config_file) 10 | 11 | # Main function 12 | def main(): 13 | model = oai.OAI(api_key=config['oai_key']) 14 | audio_file_path = 'downloads/Recording 162643-053024.mp3' 15 | transcribed_text = model.transcribe_audio(audio_file_path) 16 | print("\nTranscribed Text:", transcribed_text) 17 | 18 | if __name__ == "__main__": 19 | main() -------------------------------------------------------------------------------- /docs/GOOGLE_DRIVE_SETUP.md: -------------------------------------------------------------------------------- 1 | # Google Drive Setup 2 | 3 | ## Create a Google Cloud Project 4 | 5 | 1. Navigate to the [Google Cloud Console](https://console.cloud.google.com/). 6 | 2. Click on the project dropdown and select **New Project**. 7 | 3. Enter a project name and click **Create**. 8 | 9 | ## Enable Google Drive API 10 | 11 | 1. In the [APIs & Services Dashboard](https://console.cloud.google.com/apis/dashboard), click **Enable APIs and Services**. 12 | 2. Search for "Google Drive API" and select it. 13 | 3. Click **Enable**. 14 | 15 | ## Create OAuth Credentials 16 | 17 | 1. Go to **APIs & Services** > **Credentials**. 18 | 2. Click **Create Credentials** > **OAuth client ID**. 19 | 3. Select **Desktop app** and provide a name. 20 | 4. Click **Create** and download the `credentials.json` file. 21 | 5. Create a `auth` folder in the root of the project. 22 | 6. Place the `credentials.json` file in the `auth` folder. 23 | 7. Create a `auth.yaml` file in the `auth` folder. 24 | 8. Add the following content to the `auth.yaml` file: 25 | 26 | ```yaml 27 | drive: FILE_NAME 28 | ``` -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | # Read the contents of your README file 4 | from pathlib import Path 5 | this_directory = Path(__file__).parent 6 | long_description = (this_directory / "README.md").read_text() 7 | 8 | setup( 9 | name="flow_watcher", 10 | version="0.1.0", 11 | author="Your Name", 12 | author_email="your.email@example.com", 13 | description="A Python-based application that monitors a specific folder in Google Drive for real-time changes.", 14 | long_description=long_description, 15 | long_description_content_type="text/markdown", 16 | url="https://github.com/thibo73800/flow-watcher", 17 | packages=find_packages(), 18 | install_requires=[ 19 | "google-api-python-client==2.70.0", 20 | "google-auth-httplib2==0.1.0", 21 | "google-auth-oauthlib==0.4.6", 22 | "openai", 23 | "notion2markdown==0.2.0", 24 | "gtts" 25 | ], 26 | classifiers=[ 27 | "Programming Language :: Python :: 3", 28 | "License :: OSI Approved :: MIT License", 29 | "Operating System :: OS Independent", 30 | ], 31 | python_requires='>=3.7', 32 | ) -------------------------------------------------------------------------------- /sandbox/notion_read.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | from flow_watcher.notion import NotionAPI 4 | 5 | def read_notion_page(): 6 | # Load configuration from YAML files 7 | with open('auth/auth.yaml', 'r') as auth_file: 8 | auth_config = yaml.safe_load(auth_file) 9 | 10 | with open('config.yaml', 'r') as config_file: 11 | config = yaml.safe_load(config_file) 12 | 13 | # Get Notion API key and database ID from the configuration 14 | notion_api_key = auth_config['notion_api_key'] 15 | notion_database_id = config['notion_database_id'] 16 | 17 | # Initialize NotionAPI 18 | notion_api = NotionAPI(notion_api_key, notion_database_id) 19 | 20 | # Specify the page ID you want to read 21 | page_id = config['notion_page_id'] 22 | 23 | # Read the page as markdown 24 | markdown_content = notion_api.read_page_markdown(page_id) 25 | 26 | print(markdown_content) 27 | 28 | return 29 | 30 | # Create a directory to store the markdown file 31 | os.makedirs('notion_exports', exist_ok=True) 32 | 33 | # Save the markdown content to a file 34 | output_file = os.path.join('notion_exports', f'{page_id}.md') 35 | with open(output_file, 'w', encoding='utf-8') as f: 36 | f.write(markdown_content) 37 | 38 | print(f"Page content has been exported to {output_file}") 39 | 40 | if __name__ == '__main__': 41 | read_notion_page() -------------------------------------------------------------------------------- /sandbox/list_and_download.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | from flow_watcher.drive import GoogleDriveHandler 4 | import io 5 | 6 | 7 | def list_and_downoad(): 8 | # Load configuration from YAML files 9 | with open('auth/auth.yaml', 'r') as auth_file: 10 | auth_config = yaml.safe_load(auth_file) 11 | 12 | with open('config.yaml', 'r') as config_file: 13 | config = yaml.safe_load(config_file) 14 | 15 | # Get credentials and folder ID from the configuration 16 | credentials_path = os.path.join('auth', auth_config['drive']) 17 | folder_id = config['drive_folder'] 18 | 19 | drive_handler = GoogleDriveHandler(credentials_path) 20 | files = drive_handler.list_files_in_folder(folder_id) 21 | 22 | if files: 23 | first_file = files[0] 24 | output_path = os.path.join('downloads', first_file['name']) 25 | os.makedirs('downloads', exist_ok=True) 26 | 27 | # Check if the file already exists before downloading 28 | if os.path.exists(output_path): 29 | print(f"File '{first_file['name']}' already exists. Skipping download.") 30 | else: 31 | print(f"Downloading: {first_file['name']}") 32 | drive_handler.download_file(first_file['id'], output_path) 33 | else: 34 | print("No files found in the specified folder.") 35 | 36 | if __name__ == '__main__': 37 | list_and_downoad() 38 | 39 | 40 | -------------------------------------------------------------------------------- /prompts/typing.md: -------------------------------------------------------------------------------- 1 | You are tasked with ensuring that a Python script is properly using typing to enhance code clarity and maintainability. The script will be provided to you, and you should analyze it and add appropriate type hints. 2 | 3 | ## Steps to Add Type Hints 4 | 5 | ### 1. Analyze the Code 6 | - Understand the structure and functionality of the code. 7 | - Identify the data types used in variables, function parameters, and return values. 8 | 9 | ### 2. Identify Key Components 10 | - Functions and methods 11 | - Variables and constants 12 | - Class attributes 13 | - Complex data structures (e.g., lists, dictionaries, tuples) 14 | 15 | ### 3. Add Type Hints 16 | - Add type hints to function parameters and return values. 17 | - Use type hints for variables and class attributes where applicable. 18 | - Utilize `typing` module for complex types (e.g., `List`, `Dict`, `Tuple`, `Optional`). 19 | 20 | ## Guidelines for Adding Type Hints 21 | 22 | ### Use Clear and Concise Type Annotations 23 | - Ensure type hints are accurate and reflect the actual data types used. 24 | - Use `Optional` for parameters that can be `None`. 25 | - Use `Union` for parameters that can be of multiple types. 26 | - Use `Any` sparingly, only when the type cannot be determined. 27 | 28 | ### Type Hint Examples 29 | - **Basic Types**: `int`, `str`, `float`, `bool` 30 | - **Complex Types**: `List[int]`, `Dict[str, Any]`, `Tuple[int, str]` 31 | - **Optional Types**: `Optional[int]` 32 | - **Union Types**: `Union[int, str]` 33 | 34 | -------------------------------------------------------------------------------- /flow_watcher/oai.py: -------------------------------------------------------------------------------- 1 | import openai 2 | from typing import List 3 | 4 | class OAI: 5 | """ 6 | A class to interact with the OpenAI API for various functionalities such as listing models and transcribing audio. 7 | """ 8 | 9 | def __init__(self, api_key: str) -> None: 10 | """ 11 | Initialize the OAI class with an API key. 12 | 13 | Parameters 14 | ---------- 15 | api_key : str 16 | The API key to authenticate with the OpenAI API. 17 | """ 18 | self.client = openai.OpenAI(api_key=api_key) 19 | 20 | def list_models(self) -> None: 21 | """ 22 | List all available models from the OpenAI API and print their IDs. 23 | 24 | This method fetches the list of models from the OpenAI API and prints the ID of each model. 25 | """ 26 | models: List[openai.Model] = self.client.models.list() 27 | for model in models: 28 | print(model.id) 29 | 30 | def transcribe_audio(self, file_path: str) -> str: 31 | """ 32 | Transcribe an audio file using the OpenAI API. 33 | 34 | This method reads an audio file and sends it to the OpenAI API for transcription using the 'whisper-1' model. 35 | 36 | Parameters 37 | ---------- 38 | file_path : str 39 | The path to the audio file to be transcribed. 40 | 41 | Returns 42 | ------- 43 | str 44 | The transcribed text from the audio file. 45 | """ 46 | with open(file_path, 'rb') as audio_file: 47 | response: openai.AudioTranscription = self.client.audio.transcriptions.create( 48 | model="whisper-1", 49 | file=audio_file 50 | ) 51 | return response.text -------------------------------------------------------------------------------- /prompts/comments.md: -------------------------------------------------------------------------------- 1 | You are tasked with adding comments to a piece of code to make it more understandable for AI systems or human developers. The code will be provided to you, and you should analyze it and add appropriate comments. 2 | 3 | ## Steps to Add Comments 4 | 5 | ### 1. Analyze the Code 6 | - Understand the structure and functionality of the code. 7 | 8 | ### 2. Identify Key Components 9 | - Functions 10 | - Loops 11 | - Conditionals 12 | - Any complex logic 13 | 14 | ### 3. Add Comments 15 | Explain the following: 16 | - The purpose of functions or code blocks 17 | - How complex algorithms or logic work 18 | - Any assumptions or limitations in the code 19 | - The meaning of important variables or data structures 20 | - Any potential edge cases or error handling 21 | 22 | ## Guidelines for Adding Comments 23 | 24 | ### Use Clear and Concise Language 25 | - Avoid stating the obvious (e.g., don’t just restate what the code does) 26 | - Focus on the “why” and “how” rather than just the “what” 27 | 28 | ### Comment Types 29 | - **Single-line comments**: For brief explanations 30 | - **Multi-line comments**: For longer explanations or function/class descriptions 31 | 32 | ### Use NumPy Comment Format for Methods 33 | - Provide a docstring in the NumPy format for each method. 34 | - Include sections for Parameters, Returns, and Examples if applicable. 35 | 36 | #### Example Format 37 | 38 | ```python 39 | def example_function(param1: int, param2: str) -> bool: 40 | """ 41 | Brief description of the function. 42 | Parameters 43 | ---------- 44 | param1 : int 45 | Description of the first parameter. 46 | param2 : str 47 | Description of the second parameter. 48 | Returns 49 | ------- 50 | bool 51 | Description of the return value. 52 | 53 | Examples (optional) 54 | -------- 55 | >>> example_function(10, 'test') 56 | True 57 | """ 58 | ``` 59 | 60 | ## Goal 61 | The goal is to make the code more understandable without changing its functionality. Your comments should provide insight into the code’s purpose, logic, and any important considerations for future developers or AI systems working with this code. -------------------------------------------------------------------------------- /docs/NOTION_SETUP.md: -------------------------------------------------------------------------------- 1 | # Notion Setup 2 | 3 | ## Create a Notion Integration 4 | 5 | 1. Navigate to the [Notion Integrations](https://www.notion.so/my-integrations) page. 6 | 2. Click on **New Integration**. 7 | 3. Enter a name for your integration and select the workspace where you want to use it. 8 | 4. Click **Submit** to create the integration. 9 | 5. Copy the **Internal Integration Token**. You will need this token to authenticate your application with Notion. 10 | 11 | ## Share a Database with Your Integration 12 | 13 | 1. Open the Notion page or database you want to integrate with. 14 | 2. Click on the **Share** button at the top-right corner of the page. 15 | 3. In the **Invite** field, search for the name of your integration and select it. 16 | 4. Click **Invite** to share the page or database with your integration. 17 | 18 | ## Configuration 19 | 20 | 1. **Create a `notion.yaml` file** 21 | 22 | - In the root of your project, create a `notion.yaml` file. 23 | - Add the following content to the `notion.yaml` file: 24 | 25 | ```yaml 26 | notion_token: YOUR_INTEGRATION_TOKEN 27 | database_id: YOUR_DATABASE_ID 28 | ``` 29 | 30 | Replace `YOUR_INTEGRATION_TOKEN` with the Internal Integration Token you copied earlier, and `YOUR_DATABASE_ID` with the ID of the Notion database you want to interact with. 31 | 32 | ## Usage 33 | 34 | 1. **Install Notion SDK** 35 | 36 | Ensure you have the Notion SDK installed. You can add it to your `requirements.txt` or install it directly using pip: 37 | 38 | ```bash 39 | pip install notion-client 40 | ``` 41 | 42 | 2. **Access Notion API in Your Code** 43 | 44 | Use the Notion SDK to interact with your Notion database. Here is a basic example in Python: 45 | 46 | ```python 47 | from notion_client import Client 48 | import yaml 49 | 50 | # Load Notion configuration 51 | with open('notion.yaml', 'r') as file: 52 | config = yaml.safe_load(file) 53 | 54 | notion = Client(auth=config['notion_token']) 55 | 56 | # Example: Retrieve a database 57 | database_id = config['database_id'] 58 | response = notion.databases.retrieve(database_id=database_id) 59 | print(response) 60 | ``` 61 | 62 | ## Troubleshooting 63 | 64 | - **Authentication Errors**: Ensure that the `notion_token` in your `notion.yaml` file is correct and that your integration has access to the database. 65 | - **API Limits**: Be aware of Notion API rate limits. Refer to the [Notion API documentation](https://developers.notion.com/docs/rate-limits) for more details. 66 | 67 | ## Additional Resources 68 | 69 | - [Notion API Documentation](https://developers.notion.com/) 70 | - [Notion SDK for Python](https://github.com/ramnes/notion-sdk-py) 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The AI Agent Watcher 2 | 3 | # ⚠️ **Warning:** This project is under heavy development and may be unstable. 4 | 5 | ## Description 6 | 7 | A Python-based application that monitors a specific folder in Google Drive for real-time changes. This watcher will notify you of any additions, deletions, or modifications within the designated folder. 8 | 9 | ## Prerequisites 10 | 11 | - **Python 3.7 or higher** installed on your machine. 12 | - A **Google account** with access to Google Drive. 13 | - **Google Cloud Project** with Google Drive API enabled. 14 | - A **Notion account** with access to the Notion API. 15 | 16 | ## Installation 17 | 18 | 1. **Clone the Repository** 19 | 20 | ```bash 21 | git clone https://github.com/thibo73800/flow-watcher.git 22 | cd flow-watcher 23 | ``` 24 | 25 | 2. **Create a Virtual Environment (Optional but Recommended)** 26 | 27 | ```bash 28 | python3 -m venv venv 29 | source venv/bin/activate # On Windows use `venv\Scripts\activate` 30 | ``` 31 | 32 | 3. **Install Python Dependencies** 33 | 34 | ```bash 35 | pip install -r requirements.txt 36 | ``` 37 | 38 | ## Setup Google Drive API 39 | 40 | For detailed instructions on setting up the Google Drive API, please refer to the [Google Drive Setup Documentation](docs/GOOGLE_DRIVE_SETUP.md). 41 | 42 | ## Setup Notion API 43 | 44 | For detailed instructions on setting up the Notion API, please refer to the [Notion Setup Documentation](NOTION_SETUP.md). 45 | 46 | ## Configuration 47 | 48 | 1. **Specify the Folder to Watch** 49 | 50 | - Create a `config.yaml` file in the root of the project. 51 | - Obtain the **Folder ID** of the Google Drive folder you want to monitor. This can be extracted from the folder's URL. 52 | - Add the following content to the `config.yaml` file: 53 | 54 | ```yaml 55 | drive_folder: YOUR_FOLDER_ID 56 | ``` 57 | 58 | 2. **Specify Notion Configuration** 59 | 60 | - Create a `notion.yaml` file in the root of the project. 61 | - Add the following content to the `notion.yaml` file: 62 | 63 | ```yaml 64 | notion_token: YOUR_INTEGRATION_TOKEN 65 | database_id: YOUR_DATABASE_ID 66 | ``` 67 | 68 | ## Usage 69 | 70 | 1. **Run the Watcher** 71 | 72 | ```bash 73 | python watcher.py 74 | ``` 75 | 76 | 2. **Authorize Access** 77 | 78 | - On the first run, a browser window will prompt you to authorize the application to access your Google Drive. 79 | - Follow the on-screen instructions to grant permissions. 80 | 81 | 3. **Monitor Folder Changes** 82 | 83 | - The application will start monitoring the specified folder. 84 | - Changes such as file additions, deletions, or updates will be logged in the console. 85 | 86 | ## Troubleshooting 87 | 88 | - **Authentication Errors**: Ensure that the `credentials.json` file is correctly placed in the project root and that you've authorized the application. 89 | - **API Quotas**: Be mindful of Google Drive API usage limits. Monitor your usage in the Google Cloud Console to avoid exceeding quotas. 90 | 91 | ## Contributing 92 | 93 | Contributions are welcome! Please open an issue or submit a pull request for any enhancements or bug fixes. 94 | 95 | ## License 96 | 97 | [MIT License](LICENSE) -------------------------------------------------------------------------------- /flow_watcher/drive.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import time 4 | from google.auth.transport.requests import Request 5 | from google.oauth2.credentials import Credentials 6 | from google_auth_oauthlib.flow import InstalledAppFlow 7 | from googleapiclient.discovery import build 8 | from googleapiclient.http import MediaIoBaseDownload 9 | import io 10 | from typing import List, Optional, Dict 11 | 12 | from flow_watcher.utils import print_progress_bar 13 | 14 | 15 | class GoogleDriveHandler: 16 | """ 17 | A handler class for interacting with Google Drive API. 18 | """ 19 | # Define the scope for Google Drive API access 20 | SCOPES: List[str] = ['https://www.googleapis.com/auth/drive.readonly'] 21 | 22 | def __init__(self, credentials_path: str): 23 | """ 24 | Initialize the GoogleDriveHandler with the path to the credentials file. 25 | 26 | Parameters 27 | ---------- 28 | credentials_path : str 29 | Path to the credentials JSON file. 30 | """ 31 | self.credentials_path: str = credentials_path 32 | self.credentials: Credentials = self._get_credentials() 33 | # Build the Google Drive service object 34 | self.service = build('drive', 'v3', credentials=self.credentials) 35 | 36 | def _get_credentials(self) -> Credentials: 37 | """ 38 | Obtain user credentials for accessing Google Drive. 39 | 40 | Returns 41 | ------- 42 | Credentials 43 | The authenticated credentials for Google Drive API access. 44 | """ 45 | creds: Optional[Credentials] = None 46 | # Check if token.json file exists to load existing credentials 47 | if os.path.exists('token.json'): 48 | creds = Credentials.from_authorized_user_file('token.json', self.SCOPES) 49 | # If no valid credentials are available, prompt the user to log in 50 | if not creds or not creds.valid: 51 | if creds and creds.expired and creds.refresh_token: 52 | # Refresh the credentials if they are expired 53 | creds.refresh(Request()) 54 | else: 55 | # Run the OAuth flow to get new credentials 56 | flow = InstalledAppFlow.from_client_secrets_file(self.credentials_path, self.SCOPES) 57 | creds = flow.run_local_server(port=0) 58 | # Save the credentials for the next run 59 | with open('token.json', 'w') as token: 60 | token.write(creds.to_json()) 61 | return creds 62 | 63 | def list_files_in_folder(self, folder_id: str) -> List[Dict[str, str]]: 64 | """ 65 | List all files in a specified Google Drive folder. 66 | 67 | Parameters 68 | ---------- 69 | folder_id : str 70 | The ID of the Google Drive folder. 71 | 72 | Returns 73 | ------- 74 | List[Dict[str, str]] 75 | A list of dictionaries containing file IDs and names. 76 | """ 77 | query = f"'{folder_id}' in parents" 78 | # Execute the query to list files in the folder 79 | results = self.service.files().list(q=query, fields="files(id, name)").execute() 80 | return results.get('files', []) 81 | 82 | def download_file(self, file_id: str, output_path: str) -> None: 83 | """ 84 | Download a file from Google Drive given its file ID. 85 | 86 | Parameters 87 | ---------- 88 | file_id : str 89 | The ID of the file to be downloaded. 90 | output_path : str 91 | The local path where the downloaded file will be saved. 92 | """ 93 | request = self.service.files().get_media(fileId=file_id) 94 | file = io.BytesIO() 95 | downloader = MediaIoBaseDownload(file, request) 96 | done: bool = False 97 | print("Downloading file:") 98 | # Download the file in chunks and show progress 99 | while not done: 100 | status, done = downloader.next_chunk() 101 | print_progress_bar(int(status.progress() * 100)) 102 | print("\nDownload complete!") 103 | # Write the downloaded file to the specified output path 104 | with open(output_path, "wb") as f: 105 | f.write(file.getvalue()) 106 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /sandbox/notion.ts: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import { marked } from 'marked'; 3 | import { MarkdownConverter } from './markdown'; 4 | 5 | interface PageProperties { 6 | [key: string]: any; // Adjust this type based on the expected structure of page properties 7 | } 8 | 9 | interface Pages { 10 | context: { [key: string]: string }; 11 | profile: string; 12 | prompt: string[]; 13 | } 14 | 15 | class NotionAPI { 16 | notionToken: string; 17 | pageId: string | null; 18 | headers: { [key: string]: string }; 19 | depth2item_count: { [key: number]: number }; 20 | apiUrl: string; 21 | 22 | constructor(notionToken: string) { 23 | this.notionToken = notionToken; 24 | this.pageId = null; 25 | this.headers = { 26 | 'Authorization': `Bearer ${notionToken}`, 27 | 'Content-Type': 'application/json', 28 | 'Notion-Version': '2022-06-28' 29 | }; 30 | this.depth2item_count = {}; 31 | this.apiUrl = ''; 32 | } 33 | 34 | setPageId(pageId: string) { 35 | this.pageId = pageId; 36 | this.apiUrl = `https://api.notion.com/v1/blocks/${pageId}/children`; 37 | } 38 | 39 | async fetchPageContent(): Promise { 40 | try { 41 | const content = await this._fetchPageContentRecursive(this.pageId!, "Principle Page", true) as Pages; 42 | return content; 43 | } catch (error) { 44 | console.error('Error fetching Notion page content on page id: ', this.pageId, error); 45 | throw new Error('Failed to fetch Notion page content'); 46 | } 47 | } 48 | 49 | async _fetchPageProperties(pageId: string): Promise { 50 | try { 51 | const response = await axios.get(`https://api.notion.com/v1/pages/${pageId}`, { headers: this.headers }); 52 | return response.data.properties; // Return the properties of the page 53 | } catch (error) { 54 | console.error('Error fetching Notion page properties:', error); 55 | return null; // Return null if there's an error 56 | } 57 | } 58 | 59 | async _fetchPageContentRecursive( 60 | pageId: string, 61 | pageTitle: string, 62 | is_page = false, 63 | pages: Pages = { context: {}, profile: "You are a helpful assistant. Always answer in markdown.", prompt: [] }, 64 | pages_ids: string[] = [], 65 | depth = 1 66 | ): Promise { 67 | if (Object.keys(pages).length === 0) { 68 | pages = { 69 | context: {}, 70 | profile: "You are a helpful assistant. Always answer in markdown.", 71 | prompt: [] 72 | }; 73 | } 74 | 75 | const content: string[] = []; 76 | 77 | if (depth === 1) { 78 | this.depth2item_count = {}; 79 | } 80 | 81 | let page_type = "context"; 82 | if (is_page) { 83 | console.info("pageId", pageId); 84 | 85 | if (pages_ids.includes(pageId)) { 86 | console.warn("pageId already in pages_ids", pageId); 87 | return pages; 88 | } 89 | pages_ids.push(pageId); 90 | 91 | const pageProperties = await this._fetchPageProperties(pageId); 92 | if (pageProperties && pageProperties["FlowExtType"]) { 93 | console.log("FlowExtType", pageProperties["FlowExtType"]); 94 | if (pageProperties["FlowExtType"].rich_text.length > 0) { 95 | page_type = pageProperties["FlowExtType"].rich_text[0].plain_text; 96 | } 97 | } 98 | console.info("page_type", page_type); 99 | } 100 | 101 | let blocks: any[] = []; 102 | let startCursor: string | undefined = undefined; 103 | let hasMore = true; 104 | 105 | while (hasMore) { 106 | const response: any = await axios.get(`https://api.notion.com/v1/blocks/${pageId}/children`, { 107 | headers: this.headers, 108 | params: { 109 | start_cursor: startCursor, 110 | page_size: 100 111 | } 112 | }); 113 | 114 | blocks = blocks.concat(response.data.results); 115 | hasMore = response.data.has_more; 116 | startCursor = response.data.next_cursor; 117 | } 118 | 119 | 120 | for (const block of blocks) { 121 | if (block.type === 'table') { 122 | console.log("table loading", block); 123 | const tableContent = await this._handleTable(block); 124 | content.push(tableContent); 125 | } else if (block[block.type] && block[block.type].rich_text && Array.isArray(block[block.type].rich_text)) { 126 | let joined_text = await this._handleRichText(block, depth); 127 | content.push(joined_text); 128 | 129 | for (let i = 0; i < block[block.type].rich_text.length; i++) { 130 | let text = block[block.type].rich_text[i]; 131 | if (text && text.type === 'mention' && text.href) { 132 | const blockId = text.href.split('/')[text.href.split('/').length - 1]; 133 | pages = await this._fetchPageContentRecursive( 134 | blockId, "Attached href Page : " + text.plain_text, true, pages, pages_ids, 1) as Pages; 135 | } 136 | } 137 | 138 | if (block.has_children) { 139 | const subBlockContent = await this._fetchPageContentRecursive( 140 | block.id, "", false, pages, pages_ids, depth + 1) as string; 141 | content.push(subBlockContent); 142 | } 143 | } else if (block.type === 'column_list' || block.type === 'column') { 144 | console.log("column_list or column", block); 145 | } else if (block.type === 'divider') { 146 | content.push('------------------------------'); 147 | } else if (block.type === 'child_page') { 148 | console.info("href _fetchPageContentRecursive", block.id); 149 | pages = await this._fetchPageContentRecursive( 150 | block.id, "Attached Page: " + block.child_page.title, true, pages, pages_ids, 1) as Pages; 151 | } else { 152 | console.warn('Unexpected block type:', block); 153 | } 154 | } 155 | 156 | console.log("content", content); 157 | 158 | if (is_page && page_type === "context") { 159 | pages[page_type][pageTitle] = content.join('\n'); 160 | return pages; 161 | } else if (is_page && page_type === "prompt") { 162 | pages[page_type].push(content.join('\n')); 163 | return pages; 164 | } else if (is_page && page_type === "profile") { 165 | pages[page_type] = content.join('\n'); 166 | return pages; 167 | } else { 168 | return content.join('\n'); 169 | } 170 | } 171 | 172 | async _handleRichText(block: any, depth: number): Promise { 173 | if (block.type === 'numbered_list_item') { 174 | if (!this.depth2item_count[depth]) { 175 | this.depth2item_count[depth] = 1; 176 | } 177 | let prefix = '-'.repeat(depth); 178 | if (depth === 1) { 179 | prefix = ''; 180 | } 181 | let result = `${prefix} ${this.depth2item_count[depth]}. ${this._getRichTextContent(block)}`; 182 | this.depth2item_count[depth]++; 183 | return result; 184 | } else { 185 | this.depth2item_count[depth] = 1; 186 | } 187 | 188 | if (block.type === 'bulleted_list_item') { 189 | let bulletPrefix = '-'.repeat(depth); 190 | return `${bulletPrefix} ${this._getRichTextContent(block)}`; 191 | } 192 | 193 | switch (block.type) { 194 | case 'table': 195 | return this._handleTable(block); 196 | case 'heading_1': 197 | return `# ${this._getRichTextContent(block)}`; 198 | case 'heading_2': 199 | return `## ${this._getRichTextContent(block)}`; 200 | case 'heading_3': 201 | return `### ${this._getRichTextContent(block)}`; 202 | case 'paragraph': 203 | return this._getRichTextContent(block); 204 | case 'to_do': 205 | return `- [${block.to_do.checked ? 'x' : ' '}] ${this._getRichTextContent(block)}`; 206 | case 'toggle': 207 | return `
${this._getRichTextContent(block)}
`; 208 | case 'code': 209 | return `\`\`\`\n${this._getRichTextContent(block)}\n\`\`\``; 210 | case 'quote': 211 | return `> ${this._getRichTextContent(block)}`; 212 | case 'callout': 213 | return `> ${this._getRichTextContent(block)}`; 214 | case 'image': 215 | return `![Image](${block.image.file.url})`; 216 | case 'video': 217 | return `[Video](${block.video.file.url})`; 218 | case 'file': 219 | return `[File](${block.file.file.url})`; 220 | case 'pdf': 221 | return `[PDF](${block.pdf.file.url})`; 222 | case 'bookmark': 223 | return `[Bookmark](${block.bookmark.url})`; 224 | case 'embed': 225 | return `[Embed](${block.embed.url})`; 226 | case 'link_preview': 227 | return `[Link Preview](${block.link_preview.url})`; 228 | case 'column_list': 229 | case 'column': 230 | return ''; // Handled recursively 231 | case 'child_page': 232 | case 'child_database': 233 | return ''; // Ignored 234 | default: 235 | console.warn('Unexpected block type:', block); 236 | return ''; 237 | } 238 | } 239 | 240 | _getRichTextContent(block: any): string { 241 | let joined_text = ''; 242 | for (let i = 0; i < block[block.type].rich_text.length; i++) { 243 | let text = block[block.type].rich_text[i]; 244 | if (text) { 245 | if (text.type === 'text' && text.text.content) { 246 | joined_text += this._applyMarkdown(text.text.content, text.annotations) + ' '; 247 | } else if (text.type === 'mention' && text.plain_text) { 248 | joined_text += this._applyMarkdown(text.plain_text, text.annotations) + ' '; 249 | } 250 | } 251 | } 252 | return joined_text.trim(); 253 | } 254 | 255 | _formatTableRows(rows: any[], tableWidth: number, hasColumnHeader: boolean, hasRowHeader: boolean): string { 256 | let markdown = ''; 257 | let headerRow = ''; 258 | 259 | rows.forEach((row, rowIndex) => { 260 | if (row.type !== 'table_row') return; 261 | 262 | let rowContent = '|'; 263 | row.table_row.cells.forEach((cell: any[], cellIndex: number) => { 264 | const cellContent = cell.map(textObj => textObj.plain_text).join(' '); 265 | rowContent += ` ${cellContent} |`; 266 | 267 | if (rowIndex === 0 && hasColumnHeader) { 268 | headerRow += '| ' + '-'.repeat(cellContent.length) + ' '; 269 | } 270 | }); 271 | 272 | markdown += rowContent + '\n'; 273 | 274 | if (rowIndex === 0 && hasColumnHeader) { 275 | markdown += headerRow + '|\n'; 276 | } 277 | }); 278 | 279 | return markdown; 280 | } 281 | 282 | async _fetchTableRows(blockId: string): Promise { 283 | try { 284 | const response = await axios.get(`https://api.notion.com/v1/blocks/${blockId}/children`, { headers: this.headers }); 285 | return response.data.results; 286 | } catch (error) { 287 | console.error('Error fetching table rows:', error); 288 | return []; 289 | } 290 | } 291 | 292 | async _handleTable(block: any): Promise { 293 | const tableWidth = block.table.table_width; 294 | const hasColumnHeader = block.table.has_column_header; 295 | const hasRowHeader = block.table.has_row_header; 296 | 297 | const rows = await this._fetchTableRows(block.id); 298 | return this._formatTableRows(rows, tableWidth, hasColumnHeader, hasRowHeader); 299 | } 300 | 301 | _applyMarkdown(content: string, annotations: any): string { 302 | if (annotations.bold) content = `**${content}**`; 303 | if (annotations.italic) content = `*${content}*`; 304 | if (annotations.strikethrough) content = `~~${content}~~`; 305 | if (annotations.underline) content = `${content}`; 306 | if (annotations.code) content = `\`${content}\``; 307 | return content; 308 | } 309 | 310 | 311 | async _fetchPageTitle(pageId: string): Promise { 312 | try { 313 | const response = await axios.get(`https://api.notion.com/v1/pages/${pageId}`, { headers: this.headers }); 314 | const titleProperty = response.data.properties.title; 315 | if (titleProperty && titleProperty.title && titleProperty.title.length > 0) { 316 | return titleProperty.title[0].plain_text; 317 | } 318 | return "Untitled Sub-Page"; 319 | } catch (error) { 320 | console.error('Error fetching Notion page title:', error); 321 | return "Untitled Sub-Page"; 322 | } 323 | } 324 | 325 | async addMarkdownBlock(markdownContent: string): Promise { 326 | const htmlContent = marked.parse(markdownContent); // Use marked.parse 327 | const notionBlocks = MarkdownConverter.convertHtmlToNotionBlocks(htmlContent as string); 328 | 329 | try { 330 | const response = await axios.patch(this.apiUrl, { 331 | children: notionBlocks 332 | }, { 333 | headers: this.headers 334 | }); 335 | 336 | console.log('Notion API response:', response.data); 337 | return 'Markdown content added to Notion page!'; 338 | } catch (error) { 339 | console.error('Error adding block to Notion:', error as Error); // Cast error to Error type 340 | return 'Error writing to Notion: ' + (error instanceof Error ? error.message : 'Unknown error'); // Check if error is an instance of Error 341 | } 342 | } 343 | } 344 | 345 | export { NotionAPI }; -------------------------------------------------------------------------------- /flow_watcher/notion.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from typing import Any, Dict, Optional, List, Set 3 | import json 4 | 5 | class NotionAPI: 6 | """ 7 | A class to interact with the Notion API. 8 | 9 | Attributes 10 | ---------- 11 | api_key : str 12 | The API key for authenticating with the Notion API. 13 | database_id : str 14 | The ID of the Notion database to interact with. 15 | base_url : str 16 | The base URL for the Notion API. 17 | headers : Dict[str, str] 18 | The headers to include in API requests. 19 | """ 20 | def __init__(self, api_key: str, database_id: str) -> None: 21 | """ 22 | Initialize the NotionAPI class with the provided API key and database ID. 23 | 24 | Parameters 25 | ---------- 26 | api_key : str 27 | The API key for authenticating with the Notion API. 28 | database_id : str 29 | The ID of the Notion database to interact with. 30 | """ 31 | self.api_key = api_key 32 | self.database_id = database_id 33 | self.base_url = "https://api.notion.com/v1" 34 | self.headers = { 35 | "Authorization": f"Bearer {self.api_key}", 36 | "Content-Type": "application/json", 37 | "Notion-Version": "2022-06-28" 38 | } 39 | 40 | def retrieve_database_entry(self, entry_id: str) -> Dict[str, Any]: 41 | """ 42 | Retrieve a specific entry from the Notion database. 43 | 44 | Parameters 45 | ---------- 46 | entry_id : str 47 | The ID of the entry to retrieve. 48 | 49 | Returns 50 | ------- 51 | Dict[str, Any] 52 | The JSON response from the Notion API containing the entry details. 53 | """ 54 | url = f"{self.base_url}/pages/{entry_id}" 55 | response = requests.get(url, headers=self.headers) 56 | response.raise_for_status() 57 | return response.json() 58 | 59 | def write_new_page(self, title: str, content: str) -> Dict[str, Any]: 60 | """ 61 | Create a new page in the Notion database with the given title and content. 62 | 63 | Parameters 64 | ---------- 65 | title : str 66 | The title of the new page. 67 | content : str 68 | The content of the new page. 69 | 70 | Returns 71 | ------- 72 | Dict[str, Any] 73 | The JSON response from the Notion API containing the new page details. 74 | """ 75 | url = f"{self.base_url}/pages" 76 | data = { 77 | "parent": {"database_id": self.database_id}, 78 | "properties": { 79 | "title": { 80 | "title": [{"text": {"content": title}}] 81 | } 82 | }, 83 | "children": [ 84 | { 85 | "object": "block", 86 | "type": "paragraph", 87 | "paragraph": { 88 | "rich_text": [{"type": "text", "text": {"content": content}}] 89 | } 90 | } 91 | ] 92 | } 93 | response = requests.post(url, headers=self.headers, json=data) 94 | response.raise_for_status() 95 | return response.json() 96 | 97 | def read_page_markdown(self, page_id: str) -> str: 98 | """ 99 | Retrieve the content of a Notion page and convert it to Markdown. 100 | 101 | Parameters 102 | ---------- 103 | page_id : str 104 | The ID of the page to retrieve. 105 | 106 | Returns 107 | ------- 108 | str 109 | The content of the page in Markdown format. 110 | """ 111 | processed_pages: Set[str] = set() 112 | markdown_content = self._fetch_page_content_recursive(page_id, processed_pages) 113 | return markdown_content 114 | 115 | def _fetch_page_content_recursive(self, page_id: str, processed_pages: Set[str]) -> str: 116 | """ 117 | Recursively fetch the content of a Notion page and its children. 118 | 119 | Parameters 120 | ---------- 121 | page_id : str 122 | The ID of the page to retrieve. 123 | processed_pages : Set[str] 124 | A set of page IDs that have already been processed to avoid recursion. 125 | 126 | Returns 127 | ------- 128 | str 129 | The content of the page and its children in Markdown format. 130 | """ 131 | if page_id in processed_pages: 132 | print(f"Page {page_id} already processed. Skipping to avoid recursion.") 133 | return "" 134 | processed_pages.add(page_id) 135 | 136 | url = f"{self.base_url}/blocks/{page_id}/children" 137 | response = requests.get(url, headers=self.headers) 138 | 139 | try: 140 | response.raise_for_status() 141 | except requests.exceptions.HTTPError as e: 142 | print(f"HTTP error occurred: {e}") 143 | print(f"Response content: {response.content}") 144 | return "" 145 | 146 | blocks = response.json().get('results', []) 147 | markdown_content = self._convert_blocks_to_markdown(blocks, processed_pages) 148 | return markdown_content 149 | 150 | def _convert_blocks_to_markdown(self, blocks: List[Dict[str, Any]], processed_pages: Set[str]) -> str: 151 | """ 152 | Convert Notion blocks to Markdown formatted text. 153 | 154 | Parameters 155 | ---------- 156 | blocks : List[Dict[str, Any]] 157 | A list of Notion blocks to convert. 158 | processed_pages : Set[str] 159 | A set of page IDs that have already been processed to avoid recursion. 160 | 161 | Returns 162 | ------- 163 | str 164 | The content of the blocks in Markdown format. 165 | """ 166 | markdown_lines = [] 167 | for block in blocks: 168 | block_type = block.get('type') 169 | if not block_type: 170 | continue 171 | 172 | content = "" 173 | if block_type == 'paragraph': 174 | content = self._handle_paragraph(block['paragraph']) 175 | elif block_type == 'heading_1': 176 | content = self._handle_heading(block['heading_1'], level=1) 177 | elif block_type == 'heading_2': 178 | content = self._handle_heading(block['heading_2'], level=2) 179 | elif block_type == 'heading_3': 180 | content = self._handle_heading(block['heading_3'], level=3) 181 | elif block_type == 'bulleted_list_item': 182 | content = self._handle_bulleted_list_item(block['bulleted_list_item']) 183 | elif block_type == 'numbered_list_item': 184 | content = self._handle_numbered_list_item(block['numbered_list_item']) 185 | elif block_type == 'to_do': 186 | content = self._handle_to_do(block['to_do']) 187 | elif block_type == 'toggle': 188 | content = self._handle_toggle(block['toggle'], processed_pages) 189 | elif block_type == 'code': 190 | content = self._handle_code(block['code']) 191 | elif block_type == 'quote': 192 | content = self._handle_quote(block['quote']) 193 | elif block_type == 'divider': 194 | content = self._handle_divider() 195 | elif block_type == 'child_page': 196 | child_page = block.get('child_page', {}) 197 | child_page_id = child_page.get('page_id') # Updated from 'id' to 'page_id' 198 | child_page_title = child_page.get('title', 'Untitled Page') 199 | if child_page_id: 200 | child_markdown = self._fetch_page_content_recursive(child_page_id, processed_pages) 201 | content = f"\n### {child_page_title}\n\n{child_markdown}\n" 202 | else: 203 | print(f"Child page ID not found in block: {block}") 204 | else: 205 | print(f"Unhandled block type: {block_type}") 206 | continue 207 | 208 | if content: 209 | markdown_lines.append(content) 210 | 211 | return "\n\n".join(markdown_lines) 212 | 213 | def _handle_paragraph(self, paragraph: Dict[str, Any]) -> str: 214 | """ 215 | Handle paragraph blocks and convert them to Markdown. 216 | 217 | Parameters 218 | ---------- 219 | paragraph : Dict[str, Any] 220 | The paragraph block to handle. 221 | 222 | Returns 223 | ------- 224 | str 225 | The content of the paragraph in Markdown format. 226 | """ 227 | texts = paragraph.get('rich_text', []) 228 | return self._compose_text(texts) 229 | 230 | def _handle_heading(self, heading: Dict[str, Any], level: int) -> str: 231 | """ 232 | Handle heading blocks and convert them to Markdown. 233 | 234 | Parameters 235 | ---------- 236 | heading : Dict[str, Any] 237 | The heading block to handle. 238 | level : int 239 | The level of the heading (1, 2, or 3). 240 | 241 | Returns 242 | ------- 243 | str 244 | The content of the heading in Markdown format. 245 | """ 246 | texts = heading.get('rich_text', []) 247 | prefix = '#' * level 248 | return f"{prefix} {self._compose_text(texts)}" 249 | 250 | def _handle_bulleted_list_item(self, list_item: Dict[str, Any]) -> str: 251 | """ 252 | Handle bulleted list item blocks and convert them to Markdown. 253 | 254 | Parameters 255 | ---------- 256 | list_item : Dict[str, Any] 257 | The bulleted list item block to handle. 258 | 259 | Returns 260 | ------- 261 | str 262 | The content of the bulleted list item in Markdown format. 263 | """ 264 | texts = list_item.get('rich_text', []) 265 | return f"- {self._compose_text(texts)}" 266 | 267 | def _handle_numbered_list_item(self, list_item: Dict[str, Any]) -> str: 268 | """ 269 | Handle numbered list item blocks and convert them to Markdown. 270 | 271 | Parameters 272 | ---------- 273 | list_item : Dict[str, Any] 274 | The numbered list item block to handle. 275 | 276 | Returns 277 | ------- 278 | str 279 | The content of the numbered list item in Markdown format. 280 | """ 281 | texts = list_item.get('rich_text', []) 282 | return f"1. {self._compose_text(texts)}" 283 | 284 | def _handle_to_do(self, to_do: Dict[str, Any]) -> str: 285 | """ 286 | Handle to-do blocks and convert them to Markdown. 287 | 288 | Parameters 289 | ---------- 290 | to_do : Dict[str, Any] 291 | The to-do block to handle. 292 | 293 | Returns 294 | ------- 295 | str 296 | The content of the to-do block in Markdown format. 297 | """ 298 | texts = to_do.get('rich_text', []) 299 | checked = to_do.get('checked', False) 300 | checkbox = "[x]" if checked else "[ ]" 301 | return f"- {checkbox} {self._compose_text(texts)}" 302 | 303 | def _handle_toggle(self, toggle: Dict[str, Any], processed_pages: Set[str]) -> str: 304 | """ 305 | Handle toggle blocks and convert them to Markdown. 306 | 307 | Parameters 308 | ---------- 309 | toggle : Dict[str, Any] 310 | The toggle block to handle. 311 | processed_pages : Set[str] 312 | A set of page IDs that have already been processed to avoid recursion. 313 | 314 | Returns 315 | ------- 316 | str 317 | The content of the toggle block in Markdown format. 318 | """ 319 | texts = toggle.get('rich_text', []) 320 | summary = self._compose_text(texts) 321 | # Fetch children of the toggle block 322 | toggle_id = toggle.get('id') 323 | if toggle_id: 324 | url = f"{self.base_url}/blocks/{toggle_id}/children" 325 | response = requests.get(url, headers=self.headers) 326 | try: 327 | response.raise_for_status() 328 | except requests.exceptions.HTTPError as e: 329 | print(f"HTTP error occurred while fetching toggle children: {e}") 330 | return f">
{summary}\n\n
" 331 | blocks = response.json().get('results', []) 332 | nested_markdown = self._convert_blocks_to_markdown(blocks, processed_pages) 333 | return f">
{summary}\n\n{nested_markdown}\n
" 334 | else: 335 | return f">
{summary}\n\n
" 336 | 337 | def _handle_code(self, code: Dict[str, Any]) -> str: 338 | """ 339 | Handle code blocks and convert them to Markdown. 340 | 341 | Parameters 342 | ---------- 343 | code : Dict[str, Any] 344 | The code block to handle. 345 | 346 | Returns 347 | ------- 348 | str 349 | The content of the code block in Markdown format. 350 | """ 351 | language = code.get('language', '') 352 | content = code.get('rich_text', []) 353 | code_content = self._compose_text(content) 354 | return f"```{language}\n{code_content}\n```" 355 | 356 | def _handle_quote(self, quote: Dict[str, Any]) -> str: 357 | """ 358 | Handle quote blocks and convert them to Markdown. 359 | 360 | Parameters 361 | ---------- 362 | quote : Dict[str, Any] 363 | The quote block to handle. 364 | 365 | Returns 366 | ------- 367 | str 368 | The content of the quote block in Markdown format. 369 | """ 370 | texts = quote.get('rich_text', []) 371 | return f"> {self._compose_text(texts)}" 372 | 373 | def _handle_divider(self) -> str: 374 | """ 375 | Handle divider blocks and convert them to Markdown. 376 | 377 | Returns 378 | ------- 379 | str 380 | The Markdown representation of a divider. 381 | """ 382 | return "---" 383 | 384 | def _compose_text(self, texts: List[Dict[str, Any]]) -> str: 385 | """ 386 | Compose rich text objects into a single string with Markdown formatting. 387 | 388 | Parameters 389 | ---------- 390 | texts : List[Dict[str, Any]] 391 | A list of rich text objects to compose. 392 | 393 | Returns 394 | ------- 395 | str 396 | The composed text in Markdown format. 397 | """ 398 | composed = "" 399 | for text in texts: 400 | if text.get('type') == 'text': 401 | content = text['text']['content'] 402 | annotations = text.get('annotations', {}) 403 | content = self._apply_markdown_annotations(content, annotations) 404 | composed += content 405 | elif text.get('type') == 'mention': 406 | # Handle mentions if needed 407 | pass 408 | # Handle other text types as needed 409 | return composed 410 | 411 | def _apply_markdown_annotations(self, content: str, annotations: Dict[str, Any]) -> str: 412 | """ 413 | Apply Markdown formatting based on text annotations. 414 | 415 | Parameters 416 | ---------- 417 | content : str 418 | The text content to format. 419 | annotations : Dict[str, Any] 420 | The annotations to apply. 421 | 422 | Returns 423 | ------- 424 | str 425 | The formatted text. 426 | """ 427 | if annotations.get('bold'): 428 | content = f"**{content}**" 429 | if annotations.get('italic'): 430 | content = f"*{content}*" 431 | if annotations.get('underline'): 432 | content = f"{content}" 433 | if annotations.get('strikethrough'): 434 | content = f"~~{content}~~" 435 | if annotations.get('code'): 436 | content = f"`{content}`" 437 | return content 438 | 439 | def write_markdown_to_page(self, page_id: str, markdown_content: str) -> Dict[str, Any]: 440 | """ 441 | Write Markdown content to a Notion page by converting it to Notion blocks. 442 | 443 | Parameters 444 | ---------- 445 | page_id : str 446 | The ID of the page to write to. 447 | markdown_content : str 448 | The Markdown content to write. 449 | 450 | Returns 451 | ------- 452 | Dict[str, Any] 453 | The JSON response from the Notion API containing the updated page details. 454 | """ 455 | blocks = self._convert_markdown_to_blocks(markdown_content) 456 | url = f"{self.base_url}/blocks/{page_id}/children" 457 | data = { 458 | "children": blocks 459 | } 460 | response = requests.patch(url, headers=self.headers, json=data) 461 | response.raise_for_status() 462 | return response.json() 463 | 464 | def _convert_markdown_to_blocks(self, markdown: str) -> List[Dict[str, Any]]: 465 | """ 466 | Convert Markdown text to Notion blocks. 467 | 468 | Parameters 469 | ---------- 470 | markdown : str 471 | The Markdown text to convert. 472 | 473 | Returns 474 | ------- 475 | List[Dict[str, Any]] 476 | A list of Notion blocks representing the Markdown content. 477 | """ 478 | lines = markdown.split('\n') 479 | blocks = [] 480 | for line in lines: 481 | if line.startswith('### '): 482 | blocks.append({ 483 | "object": "block", 484 | "type": "heading_3", 485 | "heading_3": { 486 | "rich_text": [{"type": "text", "text": {"content": line[4:]}}] 487 | } 488 | }) 489 | elif line.startswith('## '): 490 | blocks.append({ 491 | "object": "block", 492 | "type": "heading_2", 493 | "heading_2": { 494 | "rich_text": [{"type": "text", "text": {"content": line[3:]}}] 495 | } 496 | }) 497 | elif line.startswith('# '): 498 | blocks.append({ 499 | "object": "block", 500 | "type": "heading_1", 501 | "heading_1": { 502 | "rich_text": [{"type": "text", "text": {"content": line[2:]}}] 503 | } 504 | }) 505 | elif line.startswith('- [x] '): 506 | blocks.append({ 507 | "object": "block", 508 | "type": "to_do", 509 | "to_do": { 510 | "text": [{"type": "text", "text": {"content": line[6:]}}], 511 | "checked": True 512 | } 513 | }) 514 | elif line.startswith('- [ ] '): 515 | blocks.append({ 516 | "object": "block", 517 | "type": "to_do", 518 | "to_do": { 519 | "text": [{"type": "text", "text": {"content": line[6:]}}], 520 | "checked": False 521 | } 522 | }) 523 | elif line.startswith('- '): 524 | blocks.append({ 525 | "object": "block", 526 | "type": "bulleted_list_item", 527 | "bulleted_list_item": { 528 | "rich_text": [{"type": "text", "text": {"content": line[2:]}}] 529 | } 530 | }) 531 | elif line.startswith('1. '): 532 | blocks.append({ 533 | "object": "block", 534 | "type": "numbered_list_item", 535 | "numbered_list_item": { 536 | "rich_text": [{"type": "text", "text": {"content": line[3:]}}] 537 | } 538 | }) 539 | elif line.startswith('```'): 540 | language = line[3:].strip() 541 | blocks.append({ 542 | "object": "block", 543 | "type": "code", 544 | "code": { 545 | "text": [{"type": "text", "text": {"content": ""}}], 546 | "language": language 547 | } 548 | }) 549 | elif line.startswith('---') or line.startswith('***'): 550 | blocks.append({ 551 | "object": "block", 552 | "type": "divider", 553 | "divider": {} 554 | }) 555 | elif line.startswith('> '): 556 | blocks.append({ 557 | "object": "block", 558 | "type": "quote", 559 | "quote": { 560 | "rich_text": [{"type": "text", "text": {"content": line[2:]}}] 561 | } 562 | }) 563 | elif line.startswith('###'): 564 | # Handle details summary or other extended markdown syntax if needed 565 | blocks.append({ 566 | "object": "block", 567 | "type": "toggle", 568 | "toggle": { 569 | "rich_text": [{"type": "text", "text": {"content": line}}] 570 | } 571 | }) 572 | else: 573 | blocks.append({ 574 | "object": "block", 575 | "type": "paragraph", 576 | "paragraph": { 577 | "rich_text": [{"type": "text", "text": {"content": line}}] 578 | } 579 | }) 580 | return blocks --------------------------------------------------------------------------------