├── tests ├── __init__.py ├── mock_data.py ├── test_unit.py └── test_integration.py ├── .gitignore ├── .github └── workflows │ └── test.yml ├── LICENSE ├── action.yml ├── README.md └── dbt_docs_to_notion.py /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | __pycache__ 3 | /tests/__pycache__ -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Run Tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | env: 13 | DATABASE_NAME: 'mock_database_name' 14 | DATABASE_PARENT_ID: 'mock_database_parent_id' 15 | NOTION_TOKEN: 'mock_notion_token' 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python 3.12 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: '3.12' 22 | - name: Install dependencies 23 | run: | 24 | pip install requests 25 | pip install mock 26 | - name: Run unit tests 27 | run: | 28 | python -m unittest tests/test_unit.py 29 | - name: Run integration tests 30 | run: | 31 | python -m unittest tests/test_integration.py 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Rob Dearborn 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/mock_data.py: -------------------------------------------------------------------------------- 1 | # Mock Data for dbt and Notion API 2 | import os 3 | 4 | # Mock dbt Data 5 | DBT_MOCK_CATALOG = { 6 | "nodes": { 7 | "model.test.model_1": { 8 | "columns": { 9 | "column_1": { 10 | "type": "TEXT" 11 | }, 12 | "column_2": { 13 | "type": "TEXT" 14 | }, 15 | }, 16 | "metadata": { 17 | "owner": "owner@example.com" 18 | }, 19 | "stats": { 20 | "row_count": { 21 | "value": 1, 22 | }, 23 | "bytes": { 24 | "value": 1000000, 25 | }, 26 | }, 27 | }, 28 | }, 29 | } 30 | 31 | DBT_MOCK_MANIFEST = { 32 | "nodes": { 33 | "model.test.model_1": { 34 | "resource_type": "model", 35 | "columns": { 36 | "column_1": { 37 | "description": "Description for column 1" 38 | }, 39 | "column_2": { 40 | "description": "Description for column 2" 41 | }, 42 | }, 43 | "raw_code": "SELECT 1", 44 | "compiled_code": "SELECT 1", 45 | "name": "model_1", 46 | "description": "Description for model 1", 47 | "relation_name": "model.test.model_1", 48 | "depends_on": ["model.test.model_2"], 49 | "tags": ["tag1", "tag2"], 50 | }, 51 | }, 52 | } 53 | 54 | # Mock Notion API Responses 55 | NOTION_MOCK_EXISTENT_CHILD_PAGE_QUERY = { 56 | "results": [ 57 | { 58 | "id": "mock_child_id", 59 | "child_database": { 60 | "title": os.environ['DATABASE_NAME'], 61 | }, 62 | }, 63 | ], 64 | } 65 | 66 | NOTION_MOCK_EXISTENT_DATABASE_RECORDS_QUERY = { 67 | "results": [ 68 | { 69 | "id": "mock_record_id", 70 | }, 71 | ], 72 | } 73 | 74 | NOTION_MOCK_NONEXISTENT_QUERY = { 75 | "results": [], 76 | } 77 | 78 | NOTION_MOCK_DATABASE_CREATE = { 79 | "id": "mock_database_id", 80 | } 81 | 82 | NOTION_MOCK_RECORD_CREATE = { 83 | "id": "mock_record_id", 84 | } 85 | -------------------------------------------------------------------------------- /action.yml: -------------------------------------------------------------------------------- 1 | name: 'dbt-docs-to-notion' 2 | description: 'Exports dbt model docs to a Notion database' 3 | branding: 4 | icon: 'book-open' 5 | color: 'orange' 6 | inputs: 7 | dbt-package: 8 | description: 'dbt-bigquery, dbt-postgres, dbt-bigquery==1.0.0, etc.' 9 | required: true 10 | dbt-profile-path: 11 | description: 'where profile.yml lives' 12 | required: false 13 | default: './' 14 | dbt-target: 15 | description: 'profile target to use for dbt docs generation' 16 | required: true 17 | model-records-to-write: 18 | description: \"all\" or \"model_name_1 model_name_2 ...\" 19 | required: false 20 | default: "all" 21 | notion-database-name: 22 | description: 'what to name the Notion database of dbt models' 23 | required: true 24 | notion-parent-id: 25 | description: 'Notion page where database of dbt models will be added' 26 | required: true 27 | notion-token: 28 | description: 'Notion token api for integration to use (pass using secrets)' 29 | required: true 30 | runs: 31 | using: 'composite' 32 | steps: 33 | - name: Install Python 34 | uses: "actions/setup-python@v4" 35 | with: 36 | python-version: "3.12" 37 | - name: Install dbt 38 | run: "pip3 install ${{ inputs.dbt-package }}" 39 | shell: bash 40 | - name: Load dbt deps 41 | run: "dbt deps --profiles-dir ${{ inputs.dbt-profile-path }} --target=${{ inputs.dbt-target }}" 42 | shell: bash 43 | - name: Generate dbt Docs 44 | run: "dbt docs generate --profiles-dir ${{ inputs.dbt-profile-path }} --target=${{ inputs.dbt-target }}" 45 | shell: bash 46 | - name: Export dbt Docs to Notion 47 | run: "python3 ${{ github.action_path }}/dbt_docs_to_notion.py ${{ inputs.model-records-to-write }}" 48 | shell: bash 49 | env: 50 | DATABASE_NAME: ${{ inputs.notion-database-name }} 51 | DATABASE_PARENT_ID: ${{ inputs.notion-parent-id }} 52 | NOTION_TOKEN: ${{ inputs.notion-token }} 53 | -------------------------------------------------------------------------------- /tests/test_unit.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import patch, Mock 3 | 4 | from dbt_docs_to_notion import make_request, get_paths_or_empty, get_owner 5 | from tests.mock_data import DBT_MOCK_MANIFEST, DBT_MOCK_CATALOG, NOTION_MOCK_DATABASE_CREATE 6 | 7 | 8 | class TestMakeRequest(unittest.TestCase): 9 | @patch('dbt_docs_to_notion.requests.request') 10 | def test_valid_request(self, mock_request): 11 | mock_request.return_value = Mock(status_code=200, json=lambda: NOTION_MOCK_DATABASE_CREATE) 12 | response = make_request("some_endpoint") 13 | self.assertEqual(response, NOTION_MOCK_DATABASE_CREATE) 14 | 15 | @patch('dbt_docs_to_notion.requests.request') 16 | def test_invalid_token(self, mock_request): 17 | mock_request.return_value = Mock(status_code=403, json=lambda: {"message": "Invalid token"}) 18 | with self.assertRaises(Exception) as context: 19 | make_request("some_endpoint") 20 | self.assertIn("Request returned status code 403", str(context.exception)) 21 | 22 | @patch('dbt_docs_to_notion.requests.request') 23 | def test_error_response(self, mock_request): 24 | mock_request.return_value = Mock(status_code=500, json=lambda: {"message": "Server error"}) 25 | with self.assertRaises(Exception) as context: 26 | make_request("some_endpoint") 27 | self.assertIn("Request returned status code 500", str(context.exception)) 28 | 29 | 30 | class TestGetPathsOrEmpty(unittest.TestCase): 31 | def test_valid_path(self): 32 | result = get_paths_or_empty(DBT_MOCK_MANIFEST["nodes"]["model.test.model_1"], [["description"]]) 33 | self.assertEqual(result, "Description for model 1") 34 | 35 | def test_invalid_path(self): 36 | result = get_paths_or_empty(DBT_MOCK_MANIFEST["nodes"]["model.test.model_1"], [["invalid_path"]]) 37 | self.assertEqual(result, '') 38 | 39 | 40 | class TestGetOwner(unittest.TestCase): 41 | def test_owner_in_config(self): 42 | data = DBT_MOCK_MANIFEST["nodes"]["model.test.model_1"] 43 | catalog_nodes = DBT_MOCK_CATALOG["nodes"] 44 | result = get_owner(data, catalog_nodes, "model.test.model_1") 45 | self.assertEqual(result, "owner@example.com") 46 | 47 | 48 | if __name__ == '__main__': 49 | unittest.main() 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dbt-docs-to-notion 2 | 3 | A Github action for exporting dbt docs to a Notion database, where they can be conveniently consumed (especially by casual users in your org). 4 | 5 | ## Output 6 | 7 | A Notion database, within a parent page of your choosing, with records like this for each model that contain the same information as dbt docs: 8 | ![dbt docs to notion output](https://i.imgur.com/Y1EWj9l.png) 9 | 10 | ## Usage 11 | 12 | ### Prerequisites 13 | 14 | In advance of using this action, you should: 15 | 16 | 1. [Create a new integration within your Notion workspace](https://www.notion.so/my-integrations) 17 | 2. Create a parent Notion page for the docs database and share it with the integration from above 18 | 3. Have your Notion integration token and a working dbt `profiles.yml` accessible to your repo (I'd recommend using [Github's repository secrets](https://docs.github.com/en/actions/security-guides/encrypted-secrets); see example workflow below). 19 | 20 | Ideally you should also write descriptions for models and columns as is a [best practice](https://docs.getdbt.com/docs/building-a-dbt-project/documentation#adding-descriptions-to-your-project). 21 | 22 | > ❗️ Note: this program assumes schema-defined model and column names to be entirely lowercase. 23 | 24 | ### Inputs 25 | 26 | - `dbt-package`: dbt-bigquery, dbt-postgres, dbt-bigquery==1.0.0, etc. (**required**) 27 | - `dbt-profile-path`: where profile.yml lives (default: `./`) 28 | - `dbt-target`: profile target to use for dbt docs generation (**required**) 29 | - `model-records-to-write`: "all" or "model_name_1 model_name_2 ..." (default: "all") 30 | - `notion-database-name`: what to name the Notion database of dbt models (**required**) 31 | - `notion-parent-id`: Notion page where database of dbt models will be added (**required**) 32 | - `notion-token`: Notion token API for integration to use (pass using secrets) (**required**) 33 | 34 | ### Post-initialization Touchups 35 | 36 | Unfortunately, Notion's API doesn't allow for setting the order of properties or records in a database. Thus, after creating your database, you'll probably want to do some re-arranging (I'd recommend adding a table view to your database's parent page). 37 | 38 | ### Example workflow 39 | 40 | ```yaml 41 | name: dbt Docs to Notion 42 | 43 | on: 44 | push: 45 | branches: 46 | - master 47 | 48 | jobs: 49 | dbt-docs-to-notion: 50 | runs-on: ubuntu-latest 51 | steps: 52 | - name: Checkout 53 | uses: actions/checkout@v3 54 | - name: Create temp dbt profiles 55 | run: "printf %s \"$DBT_PROFILES\" > ./profiles.yml" 56 | env: 57 | DBT_PROFILES: ${{ secrets.DBT_PROFILES }} 58 | - name: dbt-docs-to-notion 59 | uses: rfdearborn/dbt-docs-to-notion@v1.0.9 60 | with: 61 | dbt-package: 'dbt-bigquery==1.0.0' 62 | dbt-profile-path: './' 63 | dbt-target: 'github_actions' 64 | model-records-to-write: "all" 65 | notion-database-name: 'dbt Models' 66 | notion-parent-id: '604ece5b9dca4cdda449abeabef759e8' 67 | notion-token: '${{ secrets.DBT_DOCS_TO_NOTION_TOKEN }}' 68 | ``` 69 | 70 | ## Todo 71 | 72 | - Visualize models graph 73 | -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import unittest 4 | from unittest.mock import patch, Mock 5 | 6 | from dbt_docs_to_notion import main 7 | from tests.mock_data import ( 8 | DBT_MOCK_MANIFEST, 9 | DBT_MOCK_CATALOG, 10 | NOTION_MOCK_EXISTENT_CHILD_PAGE_QUERY, 11 | NOTION_MOCK_EXISTENT_DATABASE_RECORDS_QUERY, 12 | NOTION_MOCK_NONEXISTENT_QUERY, 13 | NOTION_MOCK_DATABASE_CREATE, 14 | NOTION_MOCK_RECORD_CREATE, 15 | ) 16 | 17 | 18 | class TestDbtDocsToNotionIntegration(unittest.TestCase): 19 | 20 | def setUp(self): 21 | patch('dbt_docs_to_notion.json.load').start().side_effect = [DBT_MOCK_MANIFEST, DBT_MOCK_CATALOG] 22 | patch('dbt_docs_to_notion.open', new_callable=unittest.mock.mock_open, read_data="data").start() 23 | self.comparison_catalog = DBT_MOCK_CATALOG['nodes']['model.test.model_1'] 24 | self.comparison_manifest = DBT_MOCK_MANIFEST['nodes']['model.test.model_1'] 25 | self.recorded_requests = [] 26 | 27 | def tearDown(self): 28 | patch.stopall() 29 | 30 | def _verify_database_obj(self, database_obj): 31 | title = database_obj['title'][0] 32 | self.assertEqual(title['type'], 'text') 33 | self.assertEqual(title['text']['content'], os.environ['DATABASE_NAME']) 34 | parent = database_obj['parent'] 35 | self.assertEqual(parent['type'], 'page_id') 36 | self.assertEqual(parent['page_id'], os.environ['DATABASE_PARENT_ID']) 37 | properties = database_obj['properties'] 38 | self.assertEqual(properties['Name'], {'title': {}}) 39 | self.assertEqual(properties['Description'], {'rich_text': {}}) 40 | self.assertEqual(properties['Owner'], {'rich_text': {}}) 41 | self.assertEqual(properties['Relation'], {'rich_text': {}}) 42 | self.assertEqual( 43 | properties['Approx Rows'], 44 | {'number': {'format': 'number_with_commas'}} 45 | ) 46 | self.assertEqual( 47 | properties['Approx GB'], 48 | {'number': {'format': 'number_with_commas'}} 49 | ) 50 | self.assertEqual(properties['Depends On'], {'rich_text': {}}) 51 | self.assertEqual(properties['Tags'], {'rich_text': {}}) 52 | 53 | def _verify_record_obj(self, record_obj): 54 | parent = record_obj['parent'] 55 | self.assertEqual(parent['database_id'], NOTION_MOCK_DATABASE_CREATE['id']) 56 | properties = record_obj['properties'] 57 | self.assertEqual(properties['Name']['title'][0]['text']['content'], self.comparison_manifest['name']) 58 | self.assertEqual(properties['Description']['rich_text'][0]['text']['content'], self.comparison_manifest['description']) 59 | self.assertEqual(properties['Owner']['rich_text'][0]['text']['content'], self.comparison_catalog['metadata']['owner']) 60 | self.assertEqual(properties['Relation']['rich_text'][0]['text']['content'], self.comparison_manifest['relation_name']) 61 | self.assertEqual(properties['Approx Rows']['number'], self.comparison_catalog['stats']['row_count']['value']) 62 | self.assertEqual(properties['Approx GB']['number'], self.comparison_catalog['stats']['bytes']['value']/1e9) 63 | self.assertEqual(properties['Depends On']['rich_text'][0]['text']['content'], json.dumps(self.comparison_manifest['depends_on'])) 64 | self.assertEqual(properties['Tags']['rich_text'][0]['text']['content'], json.dumps(self.comparison_manifest['tags'])) 65 | 66 | def _verify_record_children_obj(self, record_children_obj): 67 | toc_child_block = record_children_obj[0] 68 | self.assertEqual(toc_child_block['object'], 'block') 69 | self.assertEqual(toc_child_block['type'], 'table_of_contents') 70 | columns_header_child_block = record_children_obj[1] 71 | self.assertEqual(columns_header_child_block['object'], 'block') 72 | self.assertEqual(columns_header_child_block['type'], 'heading_1') 73 | self.assertEqual(columns_header_child_block['heading_1']['rich_text'][0]['text']['content'], 'Columns') 74 | columns_child_block = record_children_obj[2] 75 | self.assertEqual(columns_child_block['object'], 'block') 76 | self.assertEqual(columns_child_block['type'], 'table') 77 | self.assertEqual(columns_child_block['table']['table_width'], 3) 78 | self.assertEqual(columns_child_block['table']['has_column_header'], True) 79 | self.assertEqual(columns_child_block['table']['has_row_header'], False) 80 | columns_table_children_obj = columns_child_block['table']['children'] 81 | columns_table_header_row = columns_table_children_obj[0] 82 | self.assertEqual(columns_table_header_row['type'], 'table_row') 83 | self.assertEqual(columns_table_header_row['table_row']['cells'][0][0]['plain_text'], 'Column') 84 | self.assertEqual(columns_table_header_row['table_row']['cells'][1][0]['plain_text'], 'Type') 85 | self.assertEqual(columns_table_header_row['table_row']['cells'][2][0]['plain_text'], 'Description') 86 | columns_table_row = columns_table_children_obj[1] 87 | self.assertEqual(columns_table_row['type'], 'table_row') 88 | self.assertEqual(columns_table_row['table_row']['cells'][0][0]['plain_text'], list(self.comparison_catalog['columns'].keys())[0]) 89 | self.assertEqual(columns_table_row['table_row']['cells'][1][0]['plain_text'], list(self.comparison_catalog['columns'].values())[0]['type']) 90 | self.assertEqual(columns_table_row['table_row']['cells'][2][0]['plain_text'], list(self.comparison_manifest['columns'].values())[0]['description']) 91 | raw_code_header_child_block = record_children_obj[3] 92 | self.assertEqual(raw_code_header_child_block['object'], 'block') 93 | self.assertEqual(raw_code_header_child_block['type'], 'heading_1') 94 | self.assertEqual(raw_code_header_child_block['heading_1']['rich_text'][0]['text']['content'], 'Raw Code') 95 | raw_code_child_block = record_children_obj[4] 96 | self.assertEqual(raw_code_child_block['object'], 'block') 97 | self.assertEqual(raw_code_child_block['type'], 'code') 98 | self.assertEqual(raw_code_child_block['code']['language'], 'sql') 99 | self.assertEqual(raw_code_child_block['code']['rich_text'][0]['text']['content'], self.comparison_manifest['raw_code']) 100 | compiled_code_header_child_block = record_children_obj[5] 101 | self.assertEqual(compiled_code_header_child_block['object'], 'block') 102 | self.assertEqual(compiled_code_header_child_block['type'], 'heading_1') 103 | self.assertEqual(compiled_code_header_child_block['heading_1']['rich_text'][0]['text']['content'], 'Compiled Code') 104 | compiled_code_child_block = record_children_obj[6] 105 | self.assertEqual(compiled_code_child_block['object'], 'block') 106 | self.assertEqual(compiled_code_child_block['type'], 'code') 107 | self.assertEqual(compiled_code_child_block['code']['language'], 'sql') 108 | self.assertEqual(compiled_code_child_block['code']['rich_text'][0]['text']['content'], self.comparison_manifest['compiled_code']) 109 | 110 | @patch('dbt_docs_to_notion.make_request') 111 | def test_create_new_database(self, mock_make_request): 112 | def _mocked_make_request(endpoint, querystring, method, **request_kwargs): 113 | self.recorded_requests.append((endpoint, method)) 114 | if endpoint == 'blocks/' and method == 'GET': 115 | return NOTION_MOCK_NONEXISTENT_QUERY 116 | elif endpoint == 'databases/' and querystring == '' and method == 'POST': 117 | database_obj = request_kwargs['json'] 118 | self._verify_database_obj(database_obj) 119 | return NOTION_MOCK_DATABASE_CREATE 120 | elif endpoint == 'databases/' and '/query' in querystring and method == 'POST': 121 | return NOTION_MOCK_NONEXISTENT_QUERY 122 | elif endpoint == 'pages/' and method == 'POST': 123 | record_obj = request_kwargs['json'] 124 | self._verify_record_obj(record_obj) 125 | record_children_obj = request_kwargs['json']['children'] 126 | self._verify_record_children_obj(record_children_obj) 127 | return NOTION_MOCK_RECORD_CREATE 128 | mock_make_request.side_effect = _mocked_make_request 129 | 130 | main(argv=[None, 'all']) 131 | 132 | self.assertEqual( 133 | self.recorded_requests, 134 | [ 135 | ('blocks/', 'GET'), 136 | ('databases/', 'POST'), 137 | ('databases/', 'POST'), 138 | ('pages/', 'POST'), 139 | ] 140 | ) 141 | 142 | @patch('dbt_docs_to_notion.make_request') 143 | def test_update_existing_database(self, mock_make_request): 144 | def _mocked_make_request(endpoint, querystring, method, **request_kwargs): 145 | self.recorded_requests.append((endpoint, method)) 146 | if endpoint == 'blocks/' and method == 'GET': 147 | return NOTION_MOCK_EXISTENT_CHILD_PAGE_QUERY 148 | elif endpoint == 'databases/' and '/query' in querystring and method == 'POST': 149 | return NOTION_MOCK_EXISTENT_DATABASE_RECORDS_QUERY 150 | elif endpoint == 'pages/' and method == 'PATCH': 151 | record_obj = request_kwargs['json'] 152 | self._verify_record_obj(record_obj) 153 | return {} # response is thrown away 154 | elif endpoint == 'blocks/' and method == 'DELETE': 155 | return {} # response is thrown away 156 | elif endpoint == 'blocks/' and method == 'PATCH': 157 | record_children_obj = request_kwargs['json']['children'] 158 | self._verify_record_children_obj(record_children_obj) 159 | return {} # response is thrown away 160 | mock_make_request.side_effect = _mocked_make_request 161 | 162 | main(argv=[None, 'all']) 163 | 164 | self.assertEqual( 165 | self.recorded_requests, 166 | [ 167 | ('blocks/', 'GET'), 168 | ('databases/', 'POST'), 169 | ('pages/mock_record_id', 'PATCH'), 170 | ('blocks/', 'GET'), 171 | ('blocks/', 'DELETE'), 172 | ('blocks/', 'PATCH'), 173 | ] 174 | ) 175 | 176 | 177 | if __name__ == '__main__': 178 | unittest.main() 179 | -------------------------------------------------------------------------------- /dbt_docs_to_notion.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | import time 5 | 6 | import requests 7 | 8 | 9 | DATABASE_PARENT_ID = os.environ['DATABASE_PARENT_ID'] 10 | DATABASE_NAME = os.environ['DATABASE_NAME'] 11 | NOTION_TOKEN = os.environ['NOTION_TOKEN'] 12 | NUMERIC_ZERO_VALUE = -1 13 | 14 | 15 | def make_request(endpoint, querystring='', method='GET', **request_kwargs): 16 | time.sleep(0.34) # notion api limit is 3 requests per second 17 | 18 | headers = { 19 | 'Authorization': NOTION_TOKEN, 20 | 'Content-Type': 'application/json', 21 | 'Notion-Version': '2022-02-22' 22 | } 23 | url = f'https://api.notion.com/v1/{endpoint}{querystring}' 24 | resp = requests.request(method, url, headers=headers, **request_kwargs) 25 | 26 | if not resp.status_code == 200: 27 | raise Exception( 28 | f"Request returned status code {resp.status_code}\nResponse text: {resp.text}" 29 | ) 30 | 31 | return resp.json() 32 | 33 | 34 | def get_paths_or_empty(parent_object, paths_array, zero_value=''): 35 | """Used for catalog_nodes accesses, since structure is variable""" 36 | for path in paths_array: 37 | obj = parent_object 38 | for el in path: 39 | if el not in obj: 40 | obj = zero_value 41 | break 42 | obj = obj[el] 43 | if obj != zero_value: 44 | return obj 45 | 46 | return zero_value 47 | 48 | 49 | def get_owner(data, catalog_nodes, model_name): 50 | """ 51 | Check for an owner field explicitly named in the DBT Config 52 | If none present, fall back to database table owner 53 | """ 54 | owner = get_paths_or_empty(data, [['config', 'meta', 'owner']], None) 55 | if owner is not None: 56 | return owner 57 | 58 | return get_paths_or_empty(catalog_nodes, [[model_name, 'metadata', 'owner']], '') 59 | 60 | 61 | def main(argv=None): 62 | if argv is None: 63 | argv = sys.argv 64 | model_records_to_write = argv[1:] # 'all' or list of model names 65 | print(f'Model records to write: {model_records_to_write}') 66 | 67 | ###### load nodes from dbt docs ###### 68 | with open('target/manifest.json', encoding='utf-8') as f: 69 | manifest = json.load(f) 70 | manifest_nodes = manifest['nodes'] 71 | 72 | with open('target/catalog.json', encoding='utf-8') as f: 73 | catalog = json.load(f) 74 | catalog_nodes = catalog['nodes'] 75 | 76 | models = {node_name: data 77 | for (node_name, data) 78 | in manifest_nodes.items() if data['resource_type'] == 'model'} 79 | 80 | ###### create database if not exists ###### 81 | children_query_resp = make_request( 82 | endpoint='blocks/', 83 | querystring=f'{DATABASE_PARENT_ID}/children', 84 | method='GET' 85 | ) 86 | 87 | database_id = '' 88 | for child in children_query_resp['results']: 89 | if('child_database' in child 90 | and child['child_database'] == {'title': DATABASE_NAME}): 91 | database_id = child['id'] 92 | break 93 | 94 | if database_id: 95 | print(f'database {database_id} already exists, proceeding to update records!') 96 | else: 97 | database_obj = { 98 | "title": [ 99 | { 100 | "type": "text", 101 | "text": { 102 | "content": DATABASE_NAME, 103 | "link": None 104 | } 105 | } 106 | ], 107 | "parent": { 108 | "type": "page_id", 109 | "page_id": DATABASE_PARENT_ID 110 | }, 111 | "properties": { 112 | "Name": { 113 | "title": {} 114 | }, 115 | "Description": { 116 | "rich_text": {} 117 | }, 118 | "Owner": { 119 | "rich_text": {} 120 | }, 121 | "Relation": { 122 | "rich_text": {} 123 | }, 124 | "Approx Rows": { 125 | "number": { 126 | "format": "number_with_commas" 127 | } 128 | }, 129 | "Approx GB": { 130 | "number": { 131 | "format": "number_with_commas" 132 | } 133 | }, 134 | "Depends On": { 135 | "rich_text": {} 136 | }, 137 | "Tags": { 138 | "rich_text": {} 139 | } 140 | } 141 | } 142 | 143 | print('creating database') 144 | database_creation_resp = make_request( 145 | endpoint='databases/', 146 | querystring='', 147 | method='POST', 148 | json=database_obj 149 | ) 150 | database_id = database_creation_resp['id'] 151 | print(f'\ncreated database {database_id}, proceeding to create records!') 152 | 153 | ##### create / update database records ##### 154 | for model_name, data in sorted(list(models.items()), reverse=True): 155 | if model_records_to_write == ['all'] or model_name.split(".")[-1] in model_records_to_write: 156 | # form record object 157 | column_descriptions = {name: metadata['description'] 158 | for name, metadata 159 | in data['columns'].items()} 160 | 161 | columns_table_children_obj = [ 162 | { 163 | "type": "table_row", 164 | "table_row": { 165 | "cells": [ 166 | [ 167 | { 168 | "type": "text", 169 | "text": { 170 | "content": "Column" 171 | }, 172 | "plain_text": "Column" 173 | } 174 | ], 175 | [ 176 | { 177 | "type": "text", 178 | "text": { 179 | "content": "Type" 180 | }, 181 | "plain_text": "Type" 182 | } 183 | ], 184 | [ 185 | { 186 | "type": "text", 187 | "text": { 188 | "content": "Description" 189 | }, 190 | "plain_text": "Description" 191 | } 192 | ] 193 | ] 194 | } 195 | } 196 | ] 197 | col_names_and_data = list(get_paths_or_empty( 198 | catalog_nodes, 199 | [[model_name, 'columns']], 200 | {} 201 | ).items()) 202 | for (col_name, col_data) in col_names_and_data[:98]: # notion api limit is 100 table rows 203 | columns_table_children_obj.append( 204 | { 205 | "type": "table_row", 206 | "table_row": { 207 | "cells": [ 208 | [ 209 | { 210 | "type": "text", 211 | "text": { 212 | "content": col_name 213 | }, 214 | "plain_text": col_name 215 | } 216 | ], 217 | [ 218 | { 219 | "type": "text", 220 | "text": { 221 | "content": col_data['type'] 222 | }, 223 | "plain_text": col_data['type'] 224 | } 225 | ], 226 | [ 227 | { 228 | "type": "text", 229 | "text": { 230 | "content": ( 231 | column_descriptions[col_name.lower()] 232 | if col_name.lower() in column_descriptions 233 | else '' 234 | ) 235 | }, 236 | "plain_text": ( 237 | column_descriptions[col_name.lower()] 238 | if col_name.lower() in column_descriptions 239 | else '' 240 | ) 241 | } 242 | ] 243 | ] 244 | } 245 | } 246 | ) 247 | if len(col_names_and_data) > 98: 248 | # make that columns have been truncated 249 | columns_table_children_obj.append( 250 | { 251 | "type": "table_row", 252 | "table_row": { 253 | "cells": [ 254 | [ 255 | { 256 | "type": "text", 257 | "text": { 258 | "content": "..." 259 | }, 260 | "plain_text": "..." 261 | } 262 | ], 263 | [ 264 | { 265 | "type": "text", 266 | "text": { 267 | "content": "..." 268 | }, 269 | "plain_text": "..." 270 | } 271 | ], 272 | [ 273 | { 274 | "type": "text", 275 | "text": { 276 | "content": "..." 277 | }, 278 | "plain_text": "..." 279 | } 280 | ] 281 | ] 282 | } 283 | } 284 | ) 285 | 286 | record_children_obj = [ 287 | # Table of contents 288 | { 289 | "object": "block", 290 | "type": "table_of_contents", 291 | "table_of_contents": { 292 | "color": "default" 293 | } 294 | }, 295 | # Columns 296 | { 297 | "object": "block", 298 | "type": "heading_1", 299 | "heading_1": { 300 | "rich_text": [ 301 | { 302 | "type": "text", 303 | "text": { "content": "Columns" } 304 | } 305 | ] 306 | } 307 | }, 308 | { 309 | "object": "block", 310 | "type": "table", 311 | "table": { 312 | "table_width": 3, 313 | "has_column_header": True, 314 | "has_row_header": False, 315 | "children": columns_table_children_obj 316 | } 317 | }, 318 | # Raw Code 319 | { 320 | "object": "block", 321 | "type": "heading_1", 322 | "heading_1": { 323 | "rich_text": [ 324 | { 325 | "type": "text", 326 | "text": { "content": "Raw Code" } 327 | } 328 | ] 329 | } 330 | }, 331 | { 332 | "object": "block", 333 | "type": "code", 334 | "code": { 335 | "rich_text": [ 336 | { 337 | "type": "text", 338 | "text": { 339 | "content": data['raw_code'][:2000] if 'raw_code' in data else data['raw_sql'][:2000] 340 | } 341 | } 342 | ], 343 | "language": "sql" 344 | } 345 | }, 346 | # Compiled Code 347 | { 348 | "object": "block", 349 | "type": "heading_1", 350 | "heading_1": { 351 | "rich_text": [ 352 | { 353 | "type": "text", 354 | "text": { "content": "Compiled Code" } 355 | } 356 | ] 357 | } 358 | }, 359 | { 360 | "object": "block", 361 | "type": "code", 362 | "code": { 363 | "rich_text": [ 364 | { 365 | "type": "text", 366 | "text": { 367 | "content": data['compiled_code'][:2000] if 'compiled_code' in data else data['compiled_sql'][:2000] 368 | } 369 | } 370 | ], 371 | "language": "sql" 372 | } 373 | } 374 | ] 375 | 376 | record_obj = { 377 | "parent": { 378 | "database_id": database_id 379 | }, 380 | "properties": { 381 | "Name": { 382 | "title": [ 383 | { 384 | "text": { 385 | "content": data['name'] 386 | } 387 | } 388 | ] 389 | }, 390 | "Description": { 391 | "rich_text": [ 392 | { 393 | "text": { 394 | "content": data['description'][:2000] 395 | # notion api limit is 2k characters per rich text block 396 | } 397 | } 398 | ] 399 | }, 400 | "Owner": { 401 | "rich_text": [ 402 | { 403 | "text": { 404 | "content": str( 405 | get_owner(data, catalog_nodes, model_name) 406 | )[:2000] 407 | } 408 | } 409 | ] 410 | }, 411 | "Relation": { 412 | "rich_text": [ 413 | { 414 | "text": { 415 | "content": data['relation_name'][:2000] if data['relation_name'] else "" 416 | } 417 | } 418 | ] 419 | }, 420 | "Approx Rows": { 421 | "number": get_paths_or_empty( 422 | catalog_nodes, 423 | [[model_name, 'stats', 'num_rows', 'value'], 424 | [model_name, 'stats', 'row_count', 'value']], 425 | NUMERIC_ZERO_VALUE 426 | ) 427 | }, 428 | "Approx GB": { 429 | "number": get_paths_or_empty( 430 | catalog_nodes, 431 | [[model_name, 'stats', 'bytes', 'value'], 432 | [model_name, 'stats', 'num_bytes', 'value']], 433 | NUMERIC_ZERO_VALUE 434 | ) / 1e9 435 | }, 436 | "Depends On": { 437 | "rich_text": [ 438 | { 439 | "text": { 440 | "content": json.dumps(data['depends_on'])[:2000] 441 | } 442 | } 443 | ] 444 | }, 445 | "Tags": { 446 | "rich_text": [ 447 | { 448 | "text": { 449 | "content": json.dumps(data['tags'])[:2000] 450 | } 451 | } 452 | ] 453 | } 454 | } 455 | } 456 | 457 | ###### query to see if record already exists ###### 458 | query_obj = { 459 | "filter": { 460 | "property": "Name", 461 | "title": { 462 | "equals": data['name'] 463 | } 464 | } 465 | } 466 | record_query_resp = make_request( 467 | endpoint='databases/', 468 | querystring=f'{database_id}/query', 469 | method='POST', 470 | json=query_obj 471 | ) 472 | 473 | if record_query_resp['results']: 474 | print(f'\nupdating {model_name} record') 475 | record_id = record_query_resp['results'][0]['id'] 476 | _record_update_resp = make_request( 477 | endpoint=f'pages/{record_id}', 478 | querystring='', 479 | method='PATCH', 480 | json=record_obj 481 | ) 482 | 483 | # children can't be updated via record update, so we'll delete and re-add 484 | record_children_resp = make_request( 485 | endpoint='blocks/', 486 | querystring=f'{record_id}/children', 487 | method='GET' 488 | ) 489 | for record_child in record_children_resp['results']: 490 | record_child_id = record_child['id'] 491 | _record_child_deletion_resp = make_request( 492 | endpoint='blocks/', 493 | querystring=record_child_id, 494 | method='DELETE' 495 | ) 496 | 497 | _record_children_replacement_resp = make_request( 498 | endpoint='blocks/', 499 | querystring=f'{record_id}/children', 500 | method='PATCH', 501 | json={"children": record_children_obj} 502 | ) 503 | 504 | else: 505 | print(f'\ncreating {model_name} record') 506 | record_obj['children'] = record_children_obj 507 | _record_creation_resp = make_request( 508 | endpoint='pages/', 509 | querystring='', 510 | method='POST', 511 | json=record_obj 512 | ) 513 | 514 | 515 | if __name__ == '__main__': 516 | main() 517 | --------------------------------------------------------------------------------