├── .gitignore ├── LICENSE ├── README.md ├── cache_middleware.py ├── data-proxy-arch.png ├── gitcommit.sh ├── main.py ├── requirements.txt ├── sample.env ├── tickit.duckdb └── vercel.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Senthilnathan Karuppaiah 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DuckDB Data API and Micro ORM 2 | 3 | Ever wondered if MotherDuck offers a REST API? This project steps in to bridge that gap, enabling you to interact with DuckDB databases through a RESTful interface without writing a single line of backend code. Designed for deployment at the edge, it's a cost-effective solution for hobbyists and small projects. 4 | 5 | The DuckDB Data API facilitates direct access to your DuckDB database, allowing for CRUD operations via HTTP. 6 | 7 | ## Motivation 8 | 9 | - **Simplicity and Rapid Prototyping**: Offers a dynamic, RESTful interface to cater to various project requirements with minimal or no coding. 10 | - **Adaptation to Serverless and Edge Computing**: Aims to provide a fitting database solution for applications operating at the internet's edge. 11 | - **Leveraging DuckDB’s Speed**: While DuckDB excels in performance, integrating it directly into web applications posed challenges. 12 | - **Filling the Gap Left by MotherDuck**: Although MotherDuck allows DuckDB to run serverlessly, it lacked a REST interface. This project aims to bridge that gap. 13 | - **Inspiration from MongoDB's Atlas Data Proxy**: The convenience and developer-friendly nature of MongoDB's Atlas Data Proxy influenced the development of this project, promoting quick backend support for rapid prototyping. 14 | - **Cost-Effective Deployment on Vercel**: The project prioritizes a deployment strategy that incurs minimal to no costs on platforms like Vercel. 15 | 16 | ## Design Goals 17 | 18 | - **Adopting the ActiveRecord Pattern**: Mimics the ActiveRecord design pattern to offer an intuitive way of interacting with database records. 19 | - **Automated CRUD Operations**: Streamlines Create, Read, Update, and Delete operations to save development time and effort. 20 | - **Flexibility and Dynamic Nature**: Ensures the proxy can adapt to varying project needs without rigid constraints. 21 | - **Single-File Implementation**: Strives for a lightweight solution, ideally encapsulated within a single file for ease of use and integration. 22 | 23 | 24 | ## Getting Started 25 | 26 | ### Python Environment Setup 27 | 28 | 1. **Create a Virtual Environment**: 29 | 30 | ```bash 31 | python3 -m venv env 32 | source env/bin/activate 33 | pip install -r requirements.txt 34 | ``` 35 | 36 | 2. Create a `.env` file at the root of your project to configure the environment settings: 37 | 38 | ```env 39 | # .env file 40 | DUCKDB_DATABASE_URL=duckdb:///tickit.duckdb 41 | DUCKDB_SCHEMA_NAME=main 42 | QUERY_BLACKLIST=DELETE,DROP,TRUNCATE,ALTER 43 | ``` 44 | 45 | - `DUCKDB_DATABASE_URL`: Specifies the connection URL to your local DuckDB database file or to a Motherduck connectionstring. Alternatively, you can set this to `:memory:` to use an in-memory database. 46 | - `DUCKDB_SCHEMA_NAME`: Sets the default schema for database operations within the data api. If left unset, it defaults to the "main" schema. 47 | - `QUERY_BLACKLIST`: Defines a comma-separated list of SQL keywords that the data api will reject to prevent potentially destructive operations. If this list is empty or not set, no commands will be blocked, and all types of queries will be permitted. 48 | 49 | 3. **Install Packages** 50 | ```bash 51 | pip install -r requirements.txt 52 | ``` 53 | 54 | 4. **Run the project**: 55 | 56 | ```bash 57 | uvicorn main:app --reload 58 | ``` 59 | 60 | ### Examples of `DUCKDB_DATABASE_URL` Configurations 61 | 62 | Physical DuckDB file: 63 | 64 | ```env 65 | DUCKDB_DATABASE_URL=duckdb:///path/to/your/database.duckdb 66 | ``` 67 | In-memory DuckDB instance: 68 | 69 | ```env 70 | DUCKDB_DATABASE_URL=duckdb:///:memory: 71 | ``` 72 | 73 | Motherduck: 74 | 75 | ```env 76 | DUCKDB_DATABASE_URL=duckdb:///md:[motherduck-token]@[db-name] 77 | ``` 78 | 79 | ## RESTful Routes and Actions 80 | 81 | Interact with your DuckDB database through the following RESTful routes by replacing `entity` with your table name: 82 | 83 | | Method | Route | Description | Query Parameter Examples | 84 | |--------|-------------------|-------------------------------------------|-------------------------------------------------------| 85 | | GET | `/entity` | List entities | `?limit=10&skip=20?select=field1..?order=field1 asc?field1.eq=value` | 86 | | POST | `/entity` | Create a new entity | N/A | 87 | | GET | `/entity/:id` | Get a single entity by ID | N/A | 88 | | PUT | `/entity/:id` | Replace an entity by ID (full update) | N/A | 89 | | PATCH | `/entity/:id` | Update an entity by ID (partial update) | N/A | 90 | | DELETE | `/entity/:id` | Delete an entity by ID | N/A | 91 | 92 | ### Query Parameter Examples 93 | 94 | - **Filtering**: `?field1.eq=value` filters the list by `field1` equal to `value`. 95 | - **Sorting**: `?order=field1 asc` sorts the list by `field1` in ascending order. 96 | - **Pagination**: `?limit=10&skip=20` limits the list to 10 entities, skipping the first 20. 97 | - **Selecting Fields**: `?select=field1,field2` selects only `field1` and `field2` to be returned in each entity in the list. 98 | 99 | ### Supported Filter Operators 100 | 101 | The DuckDB Data Proxy supports a range of filter operators for querying data, allowing for precise data retrieval based on specific criteria: 102 | 103 | - `.eq`: Equals 104 | - `.neq`: Not equals 105 | - `.gt`: Greater than 106 | - `.gte`: Greater than or equal to 107 | - `.lt`: Less than 108 | - `.lte`: Less than or equal to 109 | - `.like`: Like (for pattern matching) 110 | - `.ilike`: Case-insensitive pattern matching 111 | 112 | These operators can be used in query parameters to filter the data retrieved from the database. For example, `?name.like=%john%` would filter records where the `name` field contains "john". 113 | 114 | It would be more organized to place `execute/sql` in the "Additional Endpoints" section if it serves a different or more specific purpose than the standard CRUD operations. It's common to separate utility or administrative endpoints from the main CRUD operations to clarify their use cases. Here's how you could mention it: 115 | 116 | 117 | ## Additional Endpoints 118 | 119 | In addition to the core RESTful routes, the DuckDB Data Proxy provides several utility endpoints for diagnostics, metadata, and system health checks: 120 | 121 | | Method | Route | Description | Query Parameter Examples | 122 | |--------|------------------------|---------------------------------------------------------------|--------------------------| 123 | | GET | `/` | Root endpoint returning a welcome message. | N/A | 124 | | GET | `/health` | Health check endpoint. | N/A | 125 | | GET | `/debug/connection` | Tests database connection. | N/A | 126 | | POST | `/execute/sql` | Execute a custom SQL query (SELECT or DDL statement). | N/A | 127 | 128 | The `POST /execute/sql` endpoint is for advanced users who need to execute custom SQL queries or DDL statements that are not covered by the standard CRUD operations. Please use this endpoint with caution, as improper use can affect database integrity and security. 129 | 130 | 131 | ### Complete Documentation for Metadata and Profiler Endpoints 132 | 133 | --- 134 | 135 | ### Metadata Endpoints 136 | 137 | | Method | Route | Description | 138 | |--------|----------------------------|-----------------------------------------------| 139 | | GET | `/metadata/databases` | Lists all databases in the current instance. | 140 | | GET | `/metadata/schemas` | Lists all schemas in the current database. | 141 | | GET | `/metadata/tables` | Lists all tables in the current schema. | 142 | | GET | `/metadata/columns` | Lists all columns in the current schema. | 143 | | GET | `/metadata/views` | Lists all views in the current schema. | 144 | | GET | `/metadata/constraints` | Lists all constraints in the current schema. | 145 | 146 | --- 147 | 148 | ### `/metadata/{path:path}` Combinations 149 | 150 | | Path Format | Description | 151 | |---------------------------------|-------------------------------------------------------------------| 152 | | `/metadata/{database}` | Lists all schemas in the specified database. | 153 | | `/metadata/{database}/{schema}` | Lists all tables in the specified schema of a database. | 154 | | `/metadata/{database}/{schema}/{table}` | Lists all columns in the specified table of a schema. | 155 | | `/metadata/{database}/{schema}/{table}/{column}` | Fetches metadata for the specific column in the specified table. | 156 | 157 | --- 158 | 159 | ### Table Info and Column Profiler Endpoints 160 | 161 | | Method | Route | Description | 162 | |--------|------------------------------------------|---------------------------------------------------------------| 163 | | GET | `/metadata/{catalog}/{schema}/{table}/summarize` | Fetch statistical summaries for all columns in the table. | 164 | | GET | `/metadata/{catalog}/{schema}/{table}/column/{column}/summarize` | Fetch statistical summaries for a specific column in a table. | 165 | | GET | `/profile` | Profiles a table or a specific column. Use `object=db.schema.table` or `object=db.schema.table.column`. | 166 | | GET | `/describe` | Fetches metadata for a specific object using `object=db.schema.table`. | 167 | --- 168 | 169 | ## Playground 170 | Interact with the following tables from **tickit** db: `sale`, `event`, `data`, `category`, `user`, `listing`, `venue` 171 | 172 | ### User Table API Endpoints 173 | 174 | ```plaintext 175 | GET https://duckdb-data-api.vercel.app/user?state.eq=NL&liketheatre=true&limit=10&offset=5&order=firstname%20asc - List, filter, sort and paginate users 176 | POST https://duckdb-data-api.vercel.app/user - Create a new user 177 | GET https://duckdb-data-api.vercel.app/user/{id} - Get a single user by ID 178 | PUT https://duckdb-data-api.vercel.app/user/{id} - Replace a user by ID (full update) 179 | PATCH https://duckdb-data-api.vercel.app/user/{id} - Update a user by ID (partial update) 180 | DELETE https://duckdb-data-api.vercel.app/user/{id} - Delete a user by ID 181 | ``` 182 | 183 | ## Heads Up on Limitations 184 | 185 | - **Performance Considerations**: Because of the way we jump from the edge to MotherDuck and back, and how we fetch data, especially when counting items for pagination, there might be a slight delay. 186 | - **Primary Key Expectations**: Right now, we expect the primary key in your tables to be named "id". We know that’s not always the case, so we’re thinking of ways to work around this in future updates. 187 | 188 | ## Technology Stack 189 | 190 | 1. **FastAPI**: 191 | 2. **DuckDB and MotherDuck**: 192 | 3. **Python 3.9**: 193 | 4. **SQLAlchemy**: 194 | 195 | ## Architecture 196 | 197 | ```mermaid 198 | flowchart LR 199 | Client -->|REST API Call| FastAPI 200 | FastAPI -->|SQLAlchemy ORM| DuckDB 201 | DuckDB -->|Process Query| MotherDuck 202 | MotherDuck -->|Return Results| DuckDB 203 | DuckDB -->|ORM| FastAPI 204 | FastAPI -->|JSON Response| Client 205 | 206 | style FastAPI fill:#f9f,stroke:#333,stroke-width:2px 207 | style DuckDB fill:#bbf,stroke:#333,stroke-width:2px 208 | style MotherDuck fill:#fbf,stroke:#333,stroke-width:2px 209 | style Client fill:#dfd,stroke:#333,stroke-width:2px 210 | ``` 211 | 212 | ## Deployment in Vercel 213 | 214 | Deploying your application to Vercel can significantly simplify the process, thanks to its support for serverless functions. For a detailed guide on deploying FastAPI applications to Vercel, check out this insightful [blog post](https://dev.to/mihaiandrei97/building-a-fastapi-application-and-deploying-it-with-vercel-ho7). It walks you through the steps to ensure your application runs smoothly in a serverless environment, making your DuckDB Data Proxy accessible from anywhere. 215 | 216 | 217 | ## From Goduck to DuckDB Data Proxy 218 | 219 | ### Transitioning from Golang to Python 220 | Before diving into this Python project, I launched [Goduck](https://github.com/senthilsweb/goduck), a similar initiative built with Golang. It aimed to provide REST API interaction with DuckDB and MotherDuck, much like what we're doing here but in the Go ecosystem. 221 | 222 | ### Shifting Gears to Python 223 | While trying to deploy Goduck across various environments, including serverless platforms, I faced hurdles due to the C-go dependency of the Go duckDB driver, which made the build process tricky for different Linux systems. This challenge highlighted the benefits of Python's straightforwardness and the extensive support from its community. Here are the main insights: 224 | 225 | - **Simpler Python Driver**: Python's approach to DuckDB felt more straightforward and developer-friendly. 226 | - **Larger Python Community**: The vast Python community meant more potential users and contributors for this project. 227 | - **Inspiration from MongoDB**: MongoDB's Atlas Data Proxy, which simplifies database operations, inspired me to offer a similar experience for DuckDB users, facilitating quick backend setups for rapid prototyping. -------------------------------------------------------------------------------- /cache_middleware.py: -------------------------------------------------------------------------------- 1 | """ 2 | cache_middleware.py 3 | 4 | Implements caching for FastAPI requests to enhance performance and response times. 5 | The middleware utilizes Upstash Redis due to limitations encountered with vercel-kv 6 | in the Python environment. Vercel-kv offers native support for JavaScript but not for Python. 7 | 8 | Caching is particularly important for this application as it uses MotherDuck, a serverless 9 | version of DuckDB. While DuckDB is performant, deploying it at the edge with Vercel's hobby 10 | plan introduces constraints such as memory and RAM limitations, and a function timeout of 10 seconds. 11 | To mitigate these limitations and improve response times, a Redis cache was implemented. 12 | Upstash Redis was chosen for its ease of integration with Vercel, offering 500 MB of free cache 13 | storage under the hobby plan. 14 | 15 | Cache keys are consistently lowercased and prefixed with 'duckdb-data-api:' to ensure 16 | uniformity and to avoid case-sensitive cache misses. Only GET requests and a specific POST 17 | request for executing SQL are cached. 18 | 19 | For the POST method specific to the "/execute/sql" route, the caching strategy involves generating 20 | a unique cache key based on the content of the request body. This is achieved by computing an MD5 checksum 21 | of the POST body, ensuring that different contents produce different cache keys, thereby accurately 22 | caching responses based on the actual query being executed. This method addresses the challenge of caching 23 | dynamic content that could vary significantly with each request. 24 | """ 25 | 26 | 27 | from fastapi import Request 28 | from starlette.middleware.base import BaseHTTPMiddleware 29 | from starlette.responses import Response 30 | from upstash_redis.asyncio import Redis 31 | import hashlib 32 | 33 | # Load environment variables 34 | from dotenv import load_dotenv 35 | load_dotenv() 36 | 37 | # Initialize Upstash Redis using environment variables 38 | redis = Redis.from_env() 39 | 40 | class CacheMiddleware(BaseHTTPMiddleware): 41 | """ 42 | Middleware to cache GET and specific POST request responses using Upstash Redis. 43 | Generates unique cache keys based on the request method, path, query parameters, and 44 | for POST requests, the content of the request body. 45 | """ 46 | 47 | async def dispatch(self, request: Request, call_next): 48 | """ 49 | Process an incoming request by checking if it's cached. If not, call the next 50 | request handler and cache the response if applicable. 51 | """ 52 | # Construct the base cache key from the method and path 53 | base_key = f"{request.method}-{request.url.path}" 54 | 55 | # Special handling for POST to "/execute/sql" 56 | if request.method == "POST" and request.url.path == "/execute/sql": 57 | # Read and then reset the request body for hashing and processing 58 | body = await request.body() 59 | request._body = body # Reset body after reading 60 | 61 | # Create a checksum of the body to use in the cache key 62 | checksum = hashlib.md5(body).hexdigest() 63 | cache_key = f"duckdb-data-api:{base_key}?{checksum}".lower() 64 | elif request.method == "GET": 65 | # Use query parameters to distinguish GET requests 66 | cache_key = f"duckdb-data-api:{base_key}?{request.query_params}".lower() 67 | else: 68 | cache_key = None 69 | 70 | # Try to retrieve the cached response 71 | if cache_key: 72 | cached_response = await redis.get(cache_key) 73 | if cached_response: 74 | print(f"Cache hit for key: {cache_key}") 75 | return Response(content=cached_response, status_code=200, media_type='application/json') 76 | print(f"Cache miss for key: {cache_key}") 77 | 78 | # Proceed with the actual request handling if no cache is found 79 | response = await call_next(request) 80 | 81 | # Cache the response if the status code is 200 and we have a cache key 82 | if response.status_code == 200 and cache_key: 83 | body = b''.join([chunk async for chunk in response.body_iterator]) 84 | cache_content = body.decode() 85 | headers = {"Content-Length": str(len(cache_content))} 86 | await redis.set(cache_key, cache_content) 87 | print(f"Cached response for key: {cache_key}") 88 | return Response(content=cache_content, status_code=200, media_type='application/json', headers=headers) 89 | 90 | return response -------------------------------------------------------------------------------- /data-proxy-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/senthilsweb/duckdb-data-api/1da02c1ee06ae17265798ea30b2896db8fd1e1c3/data-proxy-arch.png -------------------------------------------------------------------------------- /gitcommit.sh: -------------------------------------------------------------------------------- 1 | str="no comments" 2 | git ls-files --modified | xargs git add 3 | git ls-files --deleted | xargs git rm 4 | git add -A 5 | if [ ! -z "$1" -a "$1" != " " ]; then 6 | str=$1 7 | fi 8 | git commit -m "$str" 9 | 10 | git push -u origin main -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | File Name: main.py 3 | Author(s): Sernthilnathan Karuppaiah and ChatGPT4 :-) 4 | Date: 14-Mar-2024 5 | Description: This FastAPI application serves as a data proxy to DuckDB, offering endpoints for basic database 6 | operations such as listing tables, reading table data with optional filtering, sorting, and pagination, 7 | and a debug endpoint to check database connectivity. It is designed for dynamic usage, following 8 | the ActiveRecord design pattern akin to a Rails-type microORM, and utilizes SQLAlchemy for 9 | database interaction. 10 | """ 11 | 12 | from fastapi import FastAPI, Depends, HTTPException, Request, Query, Path, Body 13 | from fastapi.encoders import jsonable_encoder 14 | from fastapi.middleware.cors import CORSMiddleware 15 | from fastapi.responses import JSONResponse 16 | import sqlglot 17 | from sqlglot import parse_one, exp 18 | from sqlglot.optimizer import optimize 19 | from sqlalchemy import create_engine, text 20 | from sqlalchemy.orm import sessionmaker, Session 21 | from typing import List, Dict, Any 22 | from pydantic import BaseModel 23 | from datetime import datetime 24 | from cache_middleware import CacheMiddleware 25 | import os 26 | from dotenv import load_dotenv 27 | import math 28 | from decimal import Decimal 29 | from sqlalchemy.sql import text 30 | 31 | 32 | 33 | # Initialize environment variables and set HOME for duckDB compatibility in serverless environments. 34 | # Only load .env file if running locally and not in Vercel 35 | if os.environ.get('VERCEL', None) != '1': 36 | # Clear all environment variables 37 | os.environ.clear() 38 | load_dotenv() 39 | 40 | os.environ['HOME'] = '/tmp' 41 | # Initialize environment variables and set HOME for duckDB compatibility in serverless environments. 42 | # Only load .env file if running locally and not in Vercel 43 | if os.environ.get('VERCEL', None) != '1': 44 | load_dotenv() 45 | 46 | # Configuration variables 47 | DATABASE_URL = os.getenv("DUCKDB_DATABASE_URL", default="duckdb:///tickit.duckdb") 48 | print(f"DATABASE_URL = [{DATABASE_URL}]") 49 | SCHEMA_NAME = os.getenv("DUCKDB_SCHEMA_NAME", default="main") 50 | print(f"SCHEMA_NAME = [{SCHEMA_NAME}]") 51 | BLACKLIST_KEYWORDS = [keyword for keyword in os.getenv("QUERY_BLACKLIST", "").split(",") if keyword] 52 | 53 | 54 | # Database engine setup 55 | engine = create_engine(DATABASE_URL) 56 | SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) 57 | 58 | app = FastAPI() 59 | #app.add_middleware(CacheMiddleware) 60 | 61 | # Dependency to get the database session 62 | def get_db(): 63 | db = SessionLocal() 64 | try: 65 | yield db 66 | finally: 67 | db.close() 68 | 69 | app.add_middleware( 70 | CORSMiddleware, 71 | allow_origins=["*"], # Allow all origins 72 | allow_credentials=True, 73 | allow_methods=["*"], # Allow all methods 74 | allow_headers=["*"], # Allow all headers 75 | ) 76 | 77 | @app.get("/") 78 | async def root(): 79 | """Root endpoint returning welcome message.""" 80 | return {"message": "Welcome to DuckDB Data Proxy!"} 81 | 82 | @app.get("/health") 83 | async def health_check(): 84 | """Health check endpoint.""" 85 | return {"message": "I am doing great!"} 86 | 87 | @app.get("/debug/connection") 88 | def debug_connection(db: Session = Depends(get_db)): 89 | """ 90 | Debug endpoint to test database connection. 91 | 92 | Attempts a simple query to verify database connectivity. 93 | """ 94 | try: 95 | result = db.execute(text("SELECT 1")) 96 | return {"status": "success", "message": "Database connection established successfully."} 97 | except Exception as e: 98 | return {"status": "error", "message": str(e)} 99 | 100 | 101 | 102 | def prepare_where_clauses(request: Request): 103 | """ 104 | Prepares WHERE clauses for SQL queries based on request query parameters. 105 | 106 | Supports various operators like .eq, .gt, .gte, .lt, .lte, .neq, and .like. 107 | """ 108 | where_clauses = [] 109 | params = {} 110 | for key, value in request.query_params.items(): 111 | if key not in ["select", "limit", "offset", "order"]: 112 | operator = "=" # Default operator 113 | if key.endswith(".eq"): 114 | operator = "=" 115 | key = key[:-3] 116 | elif key.endswith(".gt"): 117 | operator = ">" 118 | key = key[:-3] 119 | elif key.endswith(".gte"): 120 | operator = ">=" 121 | key = key[:-4] 122 | elif key.endswith(".lt"): 123 | operator = "<" 124 | key = key[:-3] 125 | elif key.endswith(".lte"): 126 | operator = "<=" 127 | key = key[:-4] 128 | elif key.endswith(".neq"): 129 | operator = "<>" 130 | key = key[:-4] 131 | elif key.endswith(".like"): 132 | operator = "ILIKE" 133 | key = key[:-5] 134 | where_clauses.append(f"{key} {operator} :{key}") 135 | params[key] = value 136 | return " AND ".join(where_clauses), params 137 | 138 | @app.get("/entity/{table_name}", response_model=List[Dict[str, Any]]) 139 | def get_entities(table_name: str, request: Request, select: str = Query("*"), 140 | order: str = Query(None), skip: int = Query(0, alias="offset"), 141 | limit: int = Query(100), db: Session = Depends(get_db)): 142 | """ 143 | Endpoint to read data from a specified table with optional filtering, sorting, and pagination. 144 | 145 | Validates table name against existing tables to prevent SQL injection. 146 | """ 147 | # Validate table name 148 | if table_name not in list_tables(db): 149 | raise HTTPException(status_code=404, detail="Table not found") 150 | 151 | # Construct query with optional WHERE, ORDER BY, and pagination 152 | base_query = f"SELECT {select} FROM {SCHEMA_NAME}.{table_name}" 153 | where_clauses, params = prepare_where_clauses(request) 154 | if where_clauses: 155 | base_query += f" WHERE {where_clauses}" 156 | count_query = f"SELECT COUNT(*) FROM {SCHEMA_NAME}.{table_name} WHERE {where_clauses}" 157 | 158 | else: 159 | count_query = f"SELECT COUNT(*) FROM {SCHEMA_NAME}.{table_name}" 160 | 161 | if order: 162 | base_query += f" ORDER BY {order}" 163 | base_query += " LIMIT :limit OFFSET :offset" 164 | print(f"base_query = {base_query}") 165 | params.update({"limit": limit, "offset": skip}) 166 | print(f"params = {params}") 167 | # Execute query and handle results 168 | try: 169 | result_proxy = db.execute(text(base_query), params) 170 | results = result_proxy.fetchall() 171 | # Use params for count query as well to respect WHERE conditions 172 | total_count = db.execute(text(count_query), params).scalar() 173 | page_number = math.ceil(skip / limit) + 1 174 | total_pages = math.ceil(total_count / limit) 175 | response_data = { 176 | "total_rows": total_count, 177 | "total_pages": total_pages, 178 | "limit": limit, 179 | "offset": skip, 180 | "current_page": page_number, 181 | "data": [{key: (value.isoformat() if isinstance(value, datetime) else value) 182 | for key, value in dict(zip(result_proxy.keys(), row)).items()} for row in results] 183 | } 184 | return JSONResponse(content=response_data) 185 | except Exception as e: 186 | raise HTTPException(status_code=500, detail=str(e)) 187 | 188 | @app.get("/entity/{table_name}/{id}", response_model=Dict[str, Any]) 189 | def get_entity(table_name: str, id: int = Path(..., description="The ID of the entity to retrieve"), 190 | db: Session = Depends(get_db)): 191 | """ 192 | Dynamically fetches a single entity by its ID from a specified table. 193 | 194 | Parameters: 195 | - table_name: str - The name of the table from which to retrieve the entity. 196 | - id: int - The unique identifier of the entity to retrieve. 197 | 198 | Returns a single entity matching the given ID from the specified table, with datetime fields properly serialized. 199 | """ 200 | # Validate table name 201 | if table_name not in list_tables(db): 202 | raise HTTPException(status_code=404, detail="Table not found") 203 | 204 | query = text(f"SELECT * FROM {SCHEMA_NAME}.{table_name} WHERE id = :id") 205 | result = db.execute(query, {"id": id}).fetchone() 206 | 207 | if result is None: 208 | raise HTTPException(status_code=404, detail=f"Record [{id}] not found in [{SCHEMA_NAME}.{table_name}]") 209 | 210 | # Convert the RowProxy object to a dictionary 211 | result_dict = {key: value for key, value in result._mapping.items()} 212 | 213 | # Serialize using jsonable_encoder to handle datetime and other complex types 214 | return jsonable_encoder(result_dict) 215 | 216 | @app.delete("/entity/{table_name}/{id}", response_model=Dict[str, Any]) 217 | def delete_entity(table_name: str, id: int = Path(..., description="The ID of the entity to delete"), 218 | db: Session = Depends(get_db)): 219 | """ 220 | Deletes a single entity by its ID from a specified table. 221 | """ 222 | # Validate table name 223 | if table_name not in list_tables(db): 224 | raise HTTPException(status_code=404, detail="Table not found") 225 | 226 | # Check if the entity exists 227 | exists_query = text(f"SELECT EXISTS(SELECT 1 FROM {SCHEMA_NAME}.{table_name} WHERE id = :id)") 228 | exists = db.execute(exists_query, {"id": id}).scalar() 229 | 230 | if not exists: 231 | raise HTTPException(status_code=404, detail=f"Record [{id}] not found in [{SCHEMA_NAME}.{table_name}]") 232 | 233 | # Delete the entity 234 | delete_query = text(f"DELETE FROM {SCHEMA_NAME}.{table_name} WHERE id = :id") 235 | db.execute(delete_query, {"id": id}) 236 | db.commit() 237 | 238 | return {"message": f"Record [{id}] deleted successfully from [{SCHEMA_NAME}.{table_name}]"} 239 | 240 | @app.post("/entity/{table_name}", response_model=Dict[str, Any]) 241 | def create_entity(table_name: str, entity_data: Dict[str, Any] = Body(...), 242 | db: Session = Depends(get_db)): 243 | """ 244 | Creates a new entity in the specified table with the provided data. 245 | """ 246 | # Validate table name 247 | if table_name not in list_tables(db): 248 | raise HTTPException(status_code=404, detail="Table not found") 249 | 250 | # Constructing SQL INSERT statement dynamically based on entity_data 251 | columns = ', '.join(entity_data.keys()) 252 | values = ', '.join([f":{key}" for key in entity_data.keys()]) 253 | insert_query = text(f"INSERT INTO {SCHEMA_NAME}.{table_name} ({columns}) VALUES ({values}) RETURNING *") 254 | 255 | # Execute the query and fetch the newly created entity 256 | result = db.execute(insert_query, entity_data).fetchone() 257 | db.commit() 258 | 259 | if result is None: 260 | raise HTTPException(status_code=500, detail="Failed to create record") 261 | 262 | # Convert the RowProxy object to a dictionary 263 | result_dict = {key: value for key, value in result._mapping.items()} 264 | 265 | # Serialize using jsonable_encoder to handle datetime and other complex types 266 | return jsonable_encoder(result_dict) 267 | 268 | @app.patch("/entity/{table_name}/{id}", response_model=Dict[str, Any]) 269 | def update_entity(table_name: str, id: int, update_data: Dict[str, Any] = Body(...), 270 | db: Session = Depends(get_db)): 271 | """ 272 | Updates an existing entity in the specified table with the provided data. 273 | """ 274 | # Validate table name 275 | if table_name not in list_tables(db): 276 | raise HTTPException(status_code=404, detail="Table not found") 277 | 278 | # First, check if the entity exists 279 | exists_query = text(f"SELECT EXISTS(SELECT 1 FROM {SCHEMA_NAME}.{table_name} WHERE id = :id)") 280 | exists = db.execute(exists_query, {"id": id}).scalar() 281 | if not exists: 282 | raise HTTPException(status_code=404, detail="Entity not found") 283 | 284 | # Constructing SQL UPDATE statement dynamically based on update_data 285 | set_clauses = ', '.join([f"{key} = :{key}" for key in update_data.keys()]) 286 | update_query = text(f"UPDATE {SCHEMA_NAME}.{table_name} SET {set_clauses} WHERE id = :id RETURNING *") 287 | 288 | # Execute the query and fetch the updated entity 289 | result = db.execute(update_query, {**update_data, "id": id}).fetchone() 290 | db.commit() 291 | 292 | if result is None: 293 | raise HTTPException(status_code=500, detail="Failed to update record [{id}] in [{table_name}]") 294 | 295 | # Convert the result row to a dict to ensure compatibility with FastAPI's response_model 296 | updated_entity = {column: value for column, value in result._mapping.items()} 297 | return updated_entity 298 | 299 | @app.put("/entity/{table_name}/{id}", response_model=Dict[str, Any]) 300 | def replace_entity(table_name: str, id: int, new_data: Dict[str, Any] = Body(...), 301 | db: Session = Depends(get_db)): 302 | 303 | if table_name not in list_tables(db): 304 | raise HTTPException(status_code=404, detail="Table not found") 305 | 306 | # First, check if the entity exists 307 | exists_query = text(f"SELECT EXISTS(SELECT 1 FROM {SCHEMA_NAME}.{table_name} WHERE id = :id)") 308 | exists = db.execute(exists_query, {"id": id}).scalar() 309 | if not exists: 310 | raise HTTPException(status_code=404, detail="Table not found") 311 | 312 | # Assuming all fields must be provided for a PUT operation, construct a dynamic UPDATE statement 313 | set_clauses = ', '.join([f"{key} = :{key}" for key in new_data.keys()]) 314 | update_query = text(f"UPDATE {SCHEMA_NAME}.{table_name} SET {set_clauses} WHERE id = :id RETURNING *") 315 | 316 | # Execute the query and fetch the updated entity 317 | result = db.execute(update_query, {**new_data, "id": id}).fetchone() 318 | db.commit() 319 | 320 | if result is None: 321 | raise HTTPException(status_code=500, detail="Failed to replace record [{id}] in [{table_name}]") 322 | 323 | # Convert the result row to a dict to ensure compatibility with FastAPI's response_model 324 | replaced_entity = {column: value for column, value in result._mapping.items()} 325 | return replaced_entity 326 | 327 | def is_query_blacklisted(query: str) -> bool: 328 | # Check if BLACKLIST_KEYWORDS is actually empty or contains only an empty string 329 | if not BLACKLIST_KEYWORDS or BLACKLIST_KEYWORDS == ['']: 330 | return False 331 | 332 | query_lower = query.lower() 333 | for keyword in BLACKLIST_KEYWORDS: 334 | # Skip empty strings which might be a result of splitting an empty environment variable 335 | if keyword and keyword in query_lower: 336 | return True 337 | return False 338 | 339 | @app.post("/execute/sql") 340 | def execute_custom_query(query: str = Body(..., embed=True), db: Session = Depends(get_db)): 341 | """ 342 | Executes a custom SQL query, which can be a SELECT statement or a DDL statement. 343 | Checks against a blacklist for prohibited keywords. 344 | 345 | Parameters: 346 | - query: str - The SQL query to execute. 347 | 348 | If the query is a SELECT statement, returns the fetched data. 349 | For DDL statements, returns a confirmation message. 350 | """ 351 | #query = query.strip().lower() 352 | query = query.strip() 353 | if is_query_blacklisted(query): 354 | raise HTTPException(status_code=403, detail="The query contains prohibited keywords.") 355 | 356 | if query.startswith("select") or query.startswith("SELECT"): 357 | # It's a select query 358 | return execute_select_query(query, db) 359 | else: 360 | # It's a DDL query 361 | return execute_ddl_query(query, db) 362 | 363 | 364 | @app.get("/metadata/databases", response_model=List[Dict[str, Any]]) 365 | def get_md_duckdb_databases(db: Session = Depends(get_db)): 366 | return execute_metadata_query("SELECT * FROM duckdb_databases", db) 367 | 368 | @app.get("/metadata/schemas", response_model=List[Dict[str, Any]]) 369 | def get_md_duckdb_databases(db: Session = Depends(get_db)): 370 | return execute_metadata_query("SELECT * FROM duckdb_schemas", db) 371 | 372 | @app.get("/metadata/tables", response_model=List[Dict[str, Any]]) 373 | def get_md_duckdb_databases(db: Session = Depends(get_db)): 374 | return execute_metadata_query("SELECT * FROM duckdb_columns", db) 375 | 376 | @app.get("/metadata/columns", response_model=List[Dict[str, Any]]) 377 | def get_md_duckdb_databases(db: Session = Depends(get_db)): 378 | return execute_metadata_query("SELECT * FROM duckdb_columns", db) 379 | 380 | @app.get("/metadata/views", response_model=List[Dict[str, Any]]) 381 | def get_md_duckdb_databases(db: Session = Depends(get_db)): 382 | return execute_metadata_query("SELECT * FROM duckdb_views", db) 383 | 384 | @app.get("/metadata/constraints", response_model=List[Dict[str, Any]]) 385 | def get_md_duckdb_databases(db: Session = Depends(get_db)): 386 | return execute_metadata_query("SELECT * FROM duckdb_constraints", db) 387 | 388 | @app.get("/metadata/{path:path}", response_model=List[Dict[str, Any]]) 389 | def handle_metadata_routes(path: str, db: Session = Depends(get_db)): 390 | """ 391 | Handles metadata routes dynamically for DuckDB catalogs, schemas, tables, and columns. 392 | Retrieves all available fields from the information schema. 393 | """ 394 | parts = path.split("/") # Split the path into components 395 | 396 | if len(parts) == 1: # Matches /metadata/{catalog} 397 | catalog = parts[0] 398 | query = f""" 399 | SELECT * 400 | FROM information_schema.schemata 401 | WHERE catalog_name = '{catalog}'; 402 | """ 403 | 404 | elif len(parts) == 2: # Matches /metadata/{catalog}/{schema} 405 | catalog, schema = parts 406 | query = f""" 407 | SELECT * 408 | FROM information_schema.tables 409 | WHERE table_catalog = '{catalog}' AND table_schema = '{schema}'; 410 | """ 411 | 412 | elif len(parts) == 3: # Matches /metadata/{catalog}/{schema}/{table} 413 | catalog, schema, table = parts 414 | query = f""" 415 | SELECT * 416 | FROM information_schema.columns 417 | WHERE table_catalog = '{catalog}' AND table_schema = '{schema}' AND table_name = '{table}'; 418 | """ 419 | 420 | elif len(parts) == 4: # Matches /metadata/{catalog}/{schema}/{table}/{column} 421 | catalog, schema, table, column = parts 422 | query = f""" 423 | SELECT * 424 | FROM information_schema.columns 425 | WHERE table_catalog = '{catalog}' AND table_schema = '{schema}' AND table_name = '{table}' AND column_name = '{column}'; 426 | """ 427 | 428 | else: 429 | # Return a 400 error if the path format is invalid 430 | raise HTTPException(status_code=400, detail="Invalid route format. Check the number of parts.") 431 | 432 | # Execute the query and return results 433 | return execute_metadata_query(query, db) 434 | 435 | @app.get("/describe", response_model=List[Dict[str, Any]]) 436 | def describe_object(object: str = Query(..., description="The object to describe, in the format 'db.schema.table'"), 437 | db: Session = Depends(get_db)): 438 | """ 439 | Fetches metadata for the specified object (table). 440 | Query parameter format: 'db.schema.table'. 441 | """ 442 | # Split the object into components 443 | try: 444 | catalog, schema, table = object.split(".") 445 | except ValueError: 446 | raise HTTPException(status_code=400, detail="Invalid object format. Use 'db.schema.table'.") 447 | 448 | # Construct the query 449 | query = f"DESCRIBE TABLE {catalog}.{schema}.{table}" 450 | 451 | # Execute and return the result 452 | return execute_metadata_query(query, db) 453 | 454 | 455 | @app.get("/profile", response_model=List[Dict[str, Any]]) 456 | def profile_object(object: str = Query(..., description="The object to profile, in the format 'db.schema.table' or 'db.schema.table.column'"), 457 | db: Session = Depends(get_db)): 458 | """ 459 | Fetches profile metadata for the specified object. 460 | Query parameter format: 'db.schema.table' (for table) or 'db.schema.table.column' (for specific column). 461 | """ 462 | parts = object.split(".") 463 | if len(parts) == 3: 464 | # Table-level profile 465 | catalog, schema, table = parts 466 | query = f"SUMMARIZE TABLE {catalog}.{schema}.{table}" 467 | return execute_profile_query(query, db) 468 | elif len(parts) == 4: 469 | # Column-level profile 470 | catalog, schema, table, column = parts 471 | query = f"SUMMARIZE TABLE {catalog}.{schema}.{table}" 472 | all_columns = execute_profile_query(query, db) 473 | 474 | # Filter for the specific column 475 | column_summary = [col for col in all_columns if col["column_name"] == column] 476 | if not column_summary: 477 | raise HTTPException(status_code=404, detail=f"Column '{column}' not found in table '{table}'.") 478 | return column_summary 479 | else: 480 | raise HTTPException(status_code=400, detail="Invalid object format. Use 'db.schema.table' or 'db.schema.table.column'.") 481 | 482 | 483 | def execute_profile_query(query: str, db: Session) -> List[Dict[str, Any]]: 484 | """ 485 | Executes a profile-specific query (e.g., SUMMARIZE TABLE) and handles Decimal objects for JSON serialization. 486 | """ 487 | try: 488 | # Use SQLAlchemy's text() to wrap raw SQL queries 489 | result_proxy = db.execute(text(query)) 490 | results = result_proxy.fetchall() 491 | 492 | # Convert results to JSON-serializable format 493 | serialized_results = [] 494 | for row in results: 495 | serialized_row = {} 496 | for key, value in zip(result_proxy.keys(), row): 497 | # Handle Decimal conversion for SUMMARIZE TABLE results 498 | if isinstance(value, Decimal): 499 | serialized_row[key] = float(value) 500 | else: 501 | serialized_row[key] = value 502 | serialized_results.append(serialized_row) 503 | 504 | return serialized_results 505 | except Exception as e: 506 | # Log and raise an HTTP exception for errors 507 | raise HTTPException(status_code=500, detail=f"Error executing profile query: {str(e)}") 508 | 509 | def execute_metadata_query(query: str, db: Session) -> List[Dict[str, Any]]: 510 | """ 511 | Executes a metadata query and formats the results. 512 | 513 | Parameters: 514 | - query: str - The SQL query to execute. 515 | - db: Session - The database session to use for query execution. 516 | 517 | Returns: 518 | - A list of dictionaries where each dictionary represents a row of query results. 519 | """ 520 | print(query) # Log the query for debugging purposes 521 | try: 522 | # Execute the query using the database session 523 | result_proxy = db.execute(text(query)) 524 | results = result_proxy.fetchall() 525 | 526 | # Convert query results into a structured format 527 | response_data = { 528 | "data": [ 529 | {key: (value.isoformat() if isinstance(value, datetime) else value) 530 | for key, value in dict(zip(result_proxy.keys(), row)).items()} 531 | for row in results 532 | ] 533 | } 534 | # Return the formatted response data as JSON 535 | return JSONResponse(content=response_data) 536 | except Exception as e: 537 | # Handle any exceptions that occur during query execution 538 | raise HTTPException(status_code=500, detail=str(e)) 539 | 540 | 541 | def execute_select_query(query: str, db: Session): 542 | 543 | print(query) 544 | 545 | try: 546 | result_proxy = db.execute(text(query)) 547 | results = result_proxy.mappings().all() # Convert to list of dictionaries 548 | # Serialize the results using jsonable_encoder to handle special data types like datetime 549 | json_compatible_data = jsonable_encoder(results) 550 | return JSONResponse(content={"data": json_compatible_data, "total_rows": len(results)}) 551 | except Exception as e: 552 | raise HTTPException(status_code=400, detail=str(e)) 553 | 554 | def execute_ddl_query(query: str, db: Session): 555 | try: 556 | db.execute(text(query)) 557 | db.commit() # Make sure to commit the transaction for DDL operations 558 | return JSONResponse(content={"message": "Query executed successfully"}) 559 | except Exception as e: 560 | db.rollback() # Rollback the transaction in case of failure 561 | raise HTTPException(status_code=400, detail=str(e)) 562 | 563 | @app.post("/sqlglot/transpile") 564 | async def sqlglot_transpile_sql(request: Request): 565 | try: 566 | # Parse JSON dynamically without a Pydantic model 567 | body = await request.json() 568 | sql = body.get("sql") 569 | transpile_to = body.get("transpile_to") 570 | 571 | if not sql: 572 | raise ValueError("No SQL provided for transpilation.") 573 | if not transpile_to: 574 | raise ValueError("No target language provided for transpilation.") 575 | 576 | # Transpile the provided SQL to the specified target language 577 | transpiled_sql = sqlglot.transpile(sql, write=transpile_to, identify=True, pretty=True)[0] 578 | return {"result_sql": transpiled_sql} 579 | except ValueError as e: 580 | raise HTTPException(status_code=400, detail=str(e)) 581 | except Exception as e: 582 | raise HTTPException(status_code=500, detail=f"An error occurred while transpiling: {e}") 583 | 584 | @app.post("/sqlglot/prettify") 585 | async def sqlglot_prettify_sql(request: Request): 586 | try: 587 | # Parse JSON dynamically without a Pydantic model 588 | body = await request.json() 589 | sql = body.get("sql") 590 | 591 | if not sql: 592 | raise ValueError("No SQL provided for prettify.") 593 | 594 | # Transpile the provided SQL to the specified target language 595 | prettified_sql = sqlglot.optimizer.optimize(sql).sql(pretty=True) 596 | return {"result_sql": prettified_sql} 597 | except ValueError as e: 598 | raise HTTPException(status_code=400, detail=str(e)) 599 | except Exception as e: 600 | raise HTTPException(status_code=500, detail=f"An error occurred while prettify: {e}") 601 | 602 | @app.post("/sqlglot/extract/column") 603 | async def sqlglot_extract_columns(request: Request): 604 | try: 605 | body = await request.json() 606 | sql = body.get("sql") 607 | 608 | if not sql: 609 | raise ValueError("No SQL provided.") 610 | 611 | parsed_sql = parse_one(sql) 612 | 613 | # Extract columns 614 | columns = [column.alias_or_name for column in parsed_sql.find_all(exp.Column)] 615 | 616 | return {"data": columns} 617 | except ValueError as e: 618 | raise HTTPException(status_code=400, detail=str(e)) 619 | except Exception as e: 620 | raise HTTPException(status_code=500, detail=f"An error occurred while extracting columns: {e}") 621 | 622 | @app.post("/sqlglot/extract/table") 623 | async def sqlglot_extract_tables(request: Request): 624 | try: 625 | body = await request.json() 626 | sql = body.get("sql") 627 | 628 | if not sql: 629 | raise ValueError("No SQL provided.") 630 | 631 | parsed_sql = parse_one(sql) 632 | 633 | # Extract tables 634 | tables = [table.name for table in parsed_sql.find_all(exp.Table)] 635 | 636 | return {"data": tables} 637 | except ValueError as e: 638 | raise HTTPException(status_code=400, detail=str(e)) 639 | except Exception as e: 640 | raise HTTPException(status_code=500, detail=f"An error occurred while extracting tables: {e}") 641 | 642 | @app.post("/sqlglot/extract/projection") 643 | async def sqlglot_extract_projections(request: Request): 644 | try: 645 | body = await request.json() 646 | sql = body.get("sql") 647 | 648 | if not sql: 649 | raise ValueError("No SQL provided.") 650 | 651 | parsed_sql = parse_one(sql) 652 | 653 | # Extract projections 654 | projections = [] 655 | for select in parsed_sql.find_all(exp.Select): 656 | projections.extend([projection.alias_or_name for projection in select.expressions]) 657 | 658 | return {"data": projections} 659 | except ValueError as e: 660 | raise HTTPException(status_code=400, detail=str(e)) 661 | except Exception as e: 662 | raise HTTPException(status_code=500, detail=f"An error occurred while extracting projections: {e}") -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.9.4 2 | aiosignal==1.3.1 3 | annotated-types==0.6.0 4 | anyio==4.3.0 5 | async-timeout==4.0.3 6 | attrs==23.2.0 7 | certifi==2024.2.2 8 | charset-normalizer==3.3.2 9 | click==8.1.7 10 | duckdb==1.1.3 11 | duckdb_engine==0.14.0 12 | exceptiongroup==1.2.0 13 | fastapi==0.110.0 14 | frozenlist==1.4.1 15 | h11==0.14.0 16 | httpcore==1.0.5 17 | httpx==0.27.0 18 | idna==3.6 19 | multidict==6.0.5 20 | packaging==24.2 21 | pydantic==2.7.0 22 | pydantic_core==2.18.1 23 | python-dotenv==1.0.0 24 | pytz==2024.1 25 | requests==2.31.0 26 | sniffio==1.3.1 27 | SQLAlchemy==2.0.36 28 | sqlglot==26.0.1 29 | sqlglotrs==0.3.0 30 | starlette==0.36.3 31 | typing_extensions==4.12.2 32 | upstash-redis==1.0.0 33 | urllib3==2.2.1 34 | uvicorn==0.28.0 35 | yarl==1.9.4 36 | -------------------------------------------------------------------------------- /sample.env: -------------------------------------------------------------------------------- 1 | DUCKDB_DATABASE_URL=duckdb:///tickit.duckdb 2 | DUCKDB_SCHEMA_NAME= 3 | QUERY_BLACKLIST=DELETE,DROP,TRUNCATE,ALTER -------------------------------------------------------------------------------- /tickit.duckdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/senthilsweb/duckdb-data-api/1da02c1ee06ae17265798ea30b2896db8fd1e1c3/tickit.duckdb -------------------------------------------------------------------------------- /vercel.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 2, 3 | "builds": [ 4 | { "src": "main.py", "use": "@vercel/python" } 5 | ], 6 | "routes": [ 7 | { "src": "/(.*)", "dest": "/main.py" } 8 | ], 9 | "env": { 10 | "APP_MODULE": "main:app" 11 | } 12 | } 13 | --------------------------------------------------------------------------------