├── .bumpversion.cfg ├── .cargo └── config.toml ├── .github ├── dependabot.yml └── workflows │ ├── build-wheels.sh │ ├── build.yaml │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── MANIFEST.in ├── examples ├── depgraph.py └── visitor.py ├── justfile ├── pyproject.toml ├── readme.md ├── rust-toolchain ├── sqloxide.pyi ├── sqloxide └── __init__.py ├── src ├── lib.rs └── visitor.rs └── tests ├── benchmark.py └── test_sqloxide.py /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.54 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | 8 | [bumpversion:file:pyproject.toml] 9 | 10 | [bumpversion:file:Cargo.toml] 11 | -------------------------------------------------------------------------------- /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [net] 2 | git-fetch-with-cli = true -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "10:00" 8 | - package-ecosystem: pip 9 | directory: "/" 10 | schedule: 11 | interval: daily 12 | time: "10:00" 13 | open-pull-requests-limit: 10 14 | ignore: 15 | - dependency-name: setuptools-rust 16 | versions: 17 | - 0.12.0 18 | - dependency-name: moz-sql-parser 19 | versions: 20 | - 4.17.21027 21 | - 4.18.21031 22 | -------------------------------------------------------------------------------- /.github/workflows/build-wheels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e -x 3 | 4 | for PYBIN in /opt/python/cp3[5678]*/bin; do 5 | "${PYBIN}/pip" install maturin 6 | "${PYBIN}/maturin" build -i "${PYBIN}/python" --release 7 | done 8 | 9 | for wheel in target/wheels/*.whl; do 10 | auditwheel repair "${wheel}" 11 | done -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: build-wheels 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - 'v*' 9 | workflow_dispatch: 10 | 11 | jobs: 12 | linux-wheels: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | target: [x86_64, i686, aarch64] 17 | steps: 18 | - uses: actions/checkout@v3 19 | - uses: PyO3/maturin-action@v1 20 | with: 21 | target: ${{ matrix.target }} 22 | maturin-version: latest 23 | command: build 24 | manylinux: auto 25 | args: --release --sdist -i 3.7 3.8 3.9 3.10 3.11 3.12 3.13 26 | - uses: actions/upload-artifact@v4 27 | with: 28 | name: linux-wheels-${{ matrix.target }} 29 | path: target/wheels/ 30 | 31 | osx-wheels: 32 | runs-on: ${{ matrix.os }} 33 | strategy: 34 | matrix: 35 | include: 36 | - os: macos-13 37 | python-version: 3.7 38 | - os: macos-latest 39 | python-version: 3.8 40 | - os: macos-latest 41 | python-version: 3.9 42 | - os: macos-latest 43 | python-version: "3.10" 44 | - os: macos-latest 45 | python-version: "3.11" 46 | - os: macos-latest 47 | python-version: "3.12" 48 | - os: macos-latest 49 | python-version: "3.13" 50 | steps: 51 | - uses: actions/checkout@v1 52 | - uses: actions-rs/toolchain@v1 53 | with: 54 | toolchain: nightly 55 | default: true 56 | - uses: actions/setup-python@v2 57 | with: 58 | python-version: ${{ matrix.python-version }} 59 | - name: Build wheels for Python 3.7 60 | if: matrix.python-version == '3.7' 61 | run: | 62 | rustup target add aarch64-apple-darwin 63 | python3 -m pip install maturin 64 | maturin build --release --target universal2-apple-darwin 65 | - name: Build wheels for other Python versions 66 | if: matrix.python-version != '3.7' 67 | run: | 68 | python3 -m pip install maturin 69 | maturin build --release 70 | - uses: actions/upload-artifact@v4 71 | with: 72 | name: osx-${{ matrix.python-version }}-wheel 73 | path: target/wheels 74 | 75 | windows-wheels: 76 | runs-on: windows-latest 77 | strategy: 78 | matrix: 79 | python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12", "3.13"] 80 | steps: 81 | - uses: actions/checkout@v1 82 | - uses: actions-rs/toolchain@v1 83 | with: 84 | toolchain: nightly 85 | default: true 86 | - uses: actions/setup-python@v2 87 | with: 88 | python-version: ${{ matrix.python-version }} 89 | - name: Build wheels 90 | run: | 91 | python -m pip install maturin 92 | maturin build --release 93 | - uses: actions/upload-artifact@v4 94 | with: 95 | name: windows-${{ matrix.python-version }}-wheel 96 | path: target/wheels 97 | 98 | collect-wheels: 99 | needs: [osx-wheels, windows-wheels, linux-wheels] 100 | runs-on: ubuntu-latest 101 | steps: 102 | - uses: actions/checkout@v2 103 | - uses: actions/download-artifact@v4 104 | 105 | - name: Display structure of downloaded files 106 | run: ls -R 107 | 108 | - run: mkdir wheels 109 | - run: mv ./linux-wheels-x86_64/* wheels 110 | - run: mv ./linux-wheels-i686/* wheels 111 | - run: mv ./linux-wheels-aarch64/* wheels 112 | - run: mv ./osx-3.13-wheel/* wheels 113 | - run: mv ./osx-3.12-wheel/* wheels 114 | - run: mv ./osx-3.11-wheel/* wheels 115 | - run: mv ./osx-3.10-wheel/* wheels 116 | - run: mv ./osx-3.9-wheel/* wheels 117 | - run: mv ./osx-3.8-wheel/* wheels 118 | - run: mv ./osx-3.7-wheel/* wheels 119 | - run: mv ./windows-3.13-wheel/* wheels 120 | - run: mv ./windows-3.12-wheel/* wheels 121 | - run: mv ./windows-3.11-wheel/* wheels 122 | - run: mv ./windows-3.10-wheel/* wheels 123 | - run: mv ./windows-3.9-wheel/* wheels 124 | - run: mv ./windows-3.8-wheel/* wheels 125 | - run: mv ./windows-3.7-wheel/* wheels 126 | 127 | - name: Upload wheels as artifact 128 | uses: actions/upload-artifact@v4 129 | with: 130 | name: all-wheels 131 | path: wheels/ 132 | retention-days: 7 133 | 134 | publish-pypi: 135 | needs: [collect-wheels] 136 | runs-on: ubuntu-latest 137 | if: startsWith(github.ref, 'refs/tags/v') 138 | steps: 139 | - uses: actions/download-artifact@v4 140 | with: 141 | name: all-wheels 142 | path: wheels/ 143 | 144 | - name: Publish to PyPI 145 | uses: pypa/gh-action-pypi-publish@release/v1 146 | with: 147 | password: ${{ secrets.PYPI }} 148 | packages_dir: wheels/ 149 | verify_metadata: false 150 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | release: 5 | types: [created] 6 | push: 7 | schedule: 8 | # Runs every Thursday at 20:23 GMT to avoid bit rot 9 | - cron: "20 23 * * 4" 10 | 11 | jobs: 12 | lint: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v1 17 | 18 | - name: Install rust toolchain 19 | uses: actions-rs/toolchain@v1 20 | with: 21 | toolchain: stable 22 | override: true 23 | components: rustfmt, clippy 24 | 25 | - name: Lint with rustfmt 26 | uses: actions-rs/cargo@v1 27 | with: 28 | command: fmt 29 | 30 | - name: Lint with clippy 31 | uses: actions-rs/cargo@v1 32 | with: 33 | command: clippy 34 | args: --all-targets --all-features 35 | 36 | - name: Test with cargo 37 | uses: actions-rs/cargo@v1.0.1 38 | with: 39 | command: test 40 | toolchain: stable 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.toptal.com/developers/gitignore/api/python,rust,vscode 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,rust,vscode 4 | 5 | ### Python ### 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | pytestdebug.log 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | doc/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | pythonenv* 120 | 121 | # Spyder project settings 122 | .spyderproject 123 | .spyproject 124 | 125 | # Rope project settings 126 | .ropeproject 127 | 128 | # mkdocs documentation 129 | /site 130 | 131 | # mypy 132 | .mypy_cache/ 133 | .dmypy.json 134 | dmypy.json 135 | 136 | # Pyre type checker 137 | .pyre/ 138 | 139 | # pytype static type analyzer 140 | .pytype/ 141 | 142 | # profiling data 143 | .prof 144 | 145 | ### Rust ### 146 | # Generated by Cargo 147 | # will have compiled files and executables 148 | /target/ 149 | 150 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 151 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 152 | Cargo.lock 153 | 154 | *.code-workspace 155 | 156 | # End of https://www.toptal.com/developers/gitignore/api/python,rust,vscode 157 | 158 | /examples/depgraph.gv* 159 | .vscode -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.1.56 2 | 3 | - Upgrade to sqlparser-rs 0.56.0 4 | 5 | In v0.55 of sqlparser-rs, the `ObjectName` structure has been changed as shown below. Here is now to migrate. 6 | 7 | ```diff 8 | - pub struct ObjectName(pub Vec); 9 | + pub struct ObjectName(pub Vec) 10 | ``` 11 | 12 | Therefore, when using the `parse_sql` function, the data structure of the table name in the return value will change. 13 | 14 | Previously: 15 | 16 | ```json 17 | { 18 | "value": "employee", 19 | "quote_style": null, 20 | "span": 21 | { 22 | "start": 23 | { 24 | "line": 4, 25 | "column": 10 26 | }, 27 | "end": 28 | { 29 | "line": 4, 30 | "column": 18 31 | } 32 | } 33 | } 34 | ``` 35 | 36 | Now: 37 | 38 | 39 | ```json 40 | { 41 | "Identifier": 42 | { 43 | "value": "employee", 44 | "quote_style": null, 45 | "span": 46 | { 47 | "start": 48 | { 49 | "line": 4, 50 | "column": 10 51 | }, 52 | "end": 53 | { 54 | "line": 4, 55 | "column": 18 56 | } 57 | } 58 | } 59 | } 60 | ``` 61 | 62 | # 0.1.36 63 | 64 | - Upgrade to sqlparser-rs 0.36.0 65 | - Add more visitor functions 66 | - `mutate_relations` 67 | - `mutate_expressions` 68 | - `extract_expressions` 69 | - add `restore_ast` 70 | - remove ability for library to internally panic via `.expect()`, now throws only `ValueError` 71 | 72 | # 0.1.35 73 | 74 | - Added `extract_relations` function to assist in extracting table references from the AST in Rust. 75 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sqloxide" 3 | version = "0.1.56" 4 | authors = ["Will Eaton "] 5 | edition = "2021" 6 | 7 | [lib] 8 | name = "sqloxide" 9 | crate-type = ["cdylib"] 10 | 11 | [dependencies] 12 | pythonize = "0.22" 13 | serde = "1.0.171" 14 | 15 | [dependencies.pyo3] 16 | version = "0.22" 17 | features = ["extension-module"] 18 | 19 | [dependencies.sqlparser] 20 | version = "0.56.0" 21 | features = ["serde", "visitor"] 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Will Eaton 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include Cargo.toml 2 | include build.py 3 | recursive-include src * -------------------------------------------------------------------------------- /examples/depgraph.py: -------------------------------------------------------------------------------- 1 | """ 2 | Builds a dependency graph by parsing all of the queries inside a folder 3 | of .sql files. Renders via graphviz. 4 | """ 5 | 6 | import argparse 7 | import json 8 | import os 9 | from glob import glob 10 | from typing import List 11 | 12 | import sqloxide 13 | from graphviz import Digraph 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("--path", "-p", type=str, help="The path to process queries for.") 17 | parser.add_argument("--dialect", "-d", type=str, help="The dialect to use.") 18 | 19 | def get_sql_files(path: str) -> List[str]: 20 | return glob(path + "/**/*.sql") 21 | 22 | 23 | def get_key_recursive(search_dict, field): 24 | """ 25 | Takes a dict with nested lists and dicts, 26 | and searches all dicts for a key of the field 27 | provided. 28 | 29 | - modified from: https://stackoverflow.com/a/20254842 30 | """ 31 | fields_found = [] 32 | 33 | for key, value in search_dict.items(): 34 | 35 | if key == field: 36 | fields_found.append(value) 37 | 38 | elif isinstance(value, dict): 39 | results = get_key_recursive(value, field) 40 | for result in results: 41 | fields_found.append(result) 42 | 43 | elif isinstance(value, list): 44 | for item in value: 45 | if isinstance(item, dict): 46 | more_results = get_key_recursive(item, field) 47 | for another_result in more_results: 48 | fields_found.append(another_result) 49 | 50 | return fields_found 51 | 52 | 53 | def get_tables_in_query(SQL: str, dialect: str) -> List[str]: 54 | 55 | res = sqloxide.parse_sql(sql=SQL, dialect=dialect) 56 | tables = get_key_recursive(res[0]["Query"], "Table") 57 | 58 | results = list() 59 | 60 | for table in tables: 61 | results.append(table["name"][0]["value"] + "." + table["name"][1]["value"]) 62 | 63 | return results 64 | 65 | 66 | if __name__ == "__main__": 67 | 68 | args = parser.parse_args() 69 | 70 | files = get_sql_files(args.path) 71 | print(f'Parsing using dialect: {args.dialect}') 72 | 73 | result_dict = dict() 74 | 75 | for _f in files: 76 | pretty_filename = ".".join(_f.split("/")[-2:]) 77 | 78 | with open(_f, "r") as f: 79 | sql = f.read() 80 | 81 | try: 82 | tables = get_tables_in_query(SQL=sql, dialect=args.dialect) 83 | result_dict[pretty_filename] = list(set(tables.copy())) 84 | except ValueError: 85 | print(f"File: {_f} failed to parse.") 86 | 87 | dot = Digraph(engine="dot") 88 | dot.attr(rankdir="LR") 89 | dot.attr(splines="ortho") 90 | dot.node_attr['shape'] = 'box' 91 | 92 | for view, tables in result_dict.items(): 93 | view = view[:-4] 94 | dot.node(view) 95 | for table in tables: 96 | dot.edge(view, table) 97 | 98 | dot.render("./examples/depgraph.gv", view=True) 99 | -------------------------------------------------------------------------------- /examples/visitor.py: -------------------------------------------------------------------------------- 1 | from sqloxide import parse_sql, mutate_expressions, mutate_relations 2 | 3 | if __name__ == "__main__": 4 | sql = "SELECT something from somewhere where something = 1 and something_else = 2" 5 | 6 | def func(x): 7 | if "CompoundIdentifier" in x.keys(): 8 | for y in x["CompoundIdentifier"]: 9 | y["value"] = y["value"].upper() 10 | return x 11 | 12 | ast = parse_sql(sql=sql, dialect="ansi") 13 | result = mutate_expressions(parsed_query=ast, func=func) 14 | print(result) 15 | 16 | def func(x): 17 | return x.replace("somewhere", "anywhere") 18 | 19 | result = mutate_relations(parsed_query=ast, func=func) 20 | print(result) 21 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | benchmark: build 2 | uvx poetry run pytest tests/benchmark.py 3 | 4 | test: 5 | uvx poetry run pytest tests/ 6 | 7 | build: 8 | uvx poetry build 9 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "sqloxide" 3 | version = "0.1.56" 4 | authors = [{ name = "Will Eaton", email= "" }] 5 | repository = "https://github.com/wseaton/sqloxide" 6 | license = "MIT" 7 | description = "Python bindings for sqlparser-rs" 8 | requires-python = ">=3.7" 9 | classifiers = [ 10 | "Development Status :: 3 - Alpha", 11 | "Programming Language :: Rust", 12 | "Topic :: Database", 13 | "License :: OSI Approved :: MIT License", 14 | ] 15 | 16 | [build-system] 17 | requires = ["maturin>=1.0,<2.0"] 18 | build-backend = "maturin" 19 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # sqloxide 2 | 3 | [![GitHub Workflow Status (with event)](https://img.shields.io/github/actions/workflow/status/wseaton/sqloxide/ci.yml)](https://github.com/wseaton/sqloxide/actions/workflows/ci.yml)[![Downloads](https://static.pepy.tech/badge/sqloxide)](https://pepy.tech/project/sqloxide) 4 | 5 | --- 6 | 7 | - [sqloxide](#sqloxide) 8 | - [Installation](#installation) 9 | - [Usage](#usage) 10 | - [Parsing](#parsing) 11 | - [AST Rewrites](#ast-rewrites) 12 | - [Benchmarks](#benchmarks) 13 | - [Example](#example) 14 | - [Develop](#develop) 15 | 16 | --- 17 | 18 | `sqloxide` wraps rust bindings for [sqlparser-rs](https://github.com/ballista-compute/sqlparser-rs) into a python package using `pyO3`. 19 | 20 | The original goal of this project was to have a very fast, efficient, and accurate SQL parser I could use for building data lineage graphs across large code bases (think hundreds of auto-generated .sql files). Most existing sql parsing approaches for python are either very slow or not accurate (especially in regards to deeply nested queries, sub-selects and/or table aliases). Looking to the rust community for support, I found the excellent `sqlparser-rs` crate which is quite easy to wrap in python code. 21 | 22 | ## Installation 23 | 24 | The project provides `manylinux2014` wheels on pypi so it should be compatible with most linux distributions. Native wheels are also now available for OSX and Windows. 25 | 26 | To install from pypi: 27 | 28 | ```sh 29 | pip install sqloxide 30 | ``` 31 | 32 | ## Usage 33 | 34 | ### Parsing 35 | 36 | Parsing a SQL query is relatively straight forward: 37 | 38 | ```python 39 | from sqloxide import parse_sql 40 | 41 | sql = """ 42 | SELECT employee.first_name, employee.last_name, 43 | call.start_time, call.end_time, call_outcome.outcome_text 44 | FROM employee 45 | INNER JOIN call ON call.employee_id = employee.id 46 | INNER JOIN call_outcome ON call.call_outcome_id = call_outcome.id 47 | ORDER BY call.start_time ASC; 48 | """ 49 | 50 | output = parse_sql(sql=sql, dialect='ansi') 51 | 52 | print(output) 53 | 54 | >>> [ 55 | { 56 | "Query": { 57 | "ctes": [], 58 | "body": { 59 | "Select": { 60 | "distinct": false, 61 | "top": null, 62 | "projection": [ 63 | { 64 | "UnnamedExpr": { 65 | "CompoundIdentifier": [ 66 | { 67 | "value": "employee", 68 | "quote_style": null 69 | }, 70 | { 71 | "value": "first_name", 72 | "quote_style": null 73 | } 74 | ] 75 | } 76 | }, 77 | { 78 | "UnnamedExpr": { 79 | "CompoundIdentifier": [ 80 | { 81 | "value": "employee", 82 | "quote_style": null 83 | }, 84 | { 85 | "value": "last_name", 86 | "quote_style": null 87 | } 88 | ] 89 | } 90 | }, 91 | { 92 | "UnnamedExpr": { 93 | "CompoundIdentifier": [ 94 | { 95 | "value": "call", 96 | "quote_style": null 97 | }, 98 | { 99 | "value": "start_time", 100 | "quote_style": null 101 | } 102 | ] 103 | } 104 | }, 105 | { # OUTPUT TRUNCATED 106 | ``` 107 | 108 | Note that you get back what looks like a JSON document but in actual python types, this is a typed AST that matches the sqlparser-rs AST schema. 109 | 110 | We can convert this AST back into a SQL query by running: 111 | 112 | ```python 113 | from sqloxide import restore_ast 114 | 115 | query = restore_ast(ast=output) 116 | print(query) 117 | ``` 118 | 119 | This reconstruction is helpful if you want to make manual edits to the AST in python. 120 | 121 | ### AST Rewrites 122 | 123 | If you want a more structured approach to AST edits, we also expose APIs that allow you to use the visitor pattern to make query modifications. 124 | 125 | Here is an example for mutating a subset of the expressions in the query to be SHOUTING UPPERCASE: 126 | 127 | ```python 128 | from sqloxide import parse_sql, mutate_expressions 129 | 130 | sql = "SELECT something from somewhere where something = 1 and something_else = 2" 131 | 132 | def func(x): 133 | if "CompoundIdentifier" in x.keys(): 134 | for y in x["CompoundIdentifier"]: 135 | y["value"] = y["value"].upper() 136 | return x 137 | 138 | ast = parse_sql(sql=sql, dialect="ansi") 139 | result = mutate_expressions(parsed_query=ast, func=func) 140 | print(result) 141 | --- 142 | >>> ['SELECT something FROM somewhere WHERE something = 1 AND something_else = 2'] 143 | ``` 144 | 145 | What if you needed to make a structured edit to the table name in the above query? There is also an API for that as well: 146 | 147 | ```python 148 | from sqloxide import parse_sql, mutate_relations 149 | 150 | def func(x): 151 | return x.replace("somewhere", "anywhere") 152 | result = mutate_relations(parsed_query=ast, func=func) 153 | print(result) 154 | --- 155 | >>> ['SELECT something FROM anywhere WHERE something = 1 AND something_else = 2'] 156 | 157 | ``` 158 | 159 | These features combined allow for powerful semantic rewrites of queries, if you have any examples you'd like to share please contribue back to the `examples/` folder! 160 | 161 | ## Benchmarks 162 | 163 | We run 4 benchmarks, comparing to some python native sql parsing libraries: 164 | 165 | - `test_sqloxide` - parse query and get a python object back from rust 166 | - `test_sqlparser` - testing [sqlparse](https://pypi.org/project/sqlparse/), query -> AST 167 | - `test_mozsqlparser` - testing [moz-sql-parser](https://pypi.org/project/moz-sql-parser/), full roundtrip as in the docs, query -> JSON 168 | - `test_sqlglot` - testing [sqlglot](https://github.com/tobymao/sqlglot/), query -> AST 169 | 170 | To run them on your machine: 171 | 172 | ``` 173 | poetry run pytest tests/benchmark.py 174 | ``` 175 | 176 | ``` 177 | ------------------------------------------------------------------------------------------- benchmark: 4 tests ------------------------------------------------------------------------------------------- 178 | Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations 179 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 180 | test_sqloxide 29.6800 (1.0) 50.4300 (1.0) 30.6219 (1.0) 0.7367 (1.0) 30.4900 (1.0) 0.2390 (1.0) 527;716 32,656.3811 (1.0) 9099 1 181 | test_sqlglot 365.8420 (12.33) 692.8950 (13.74) 377.2422 (12.32) 11.7692 (15.98) 375.7825 (12.32) 4.3145 (18.05) 62;97 2,650.8168 (0.08) 2260 1 182 | test_sqlparser 1,577.7720 (53.16) 9,751.9699 (193.38) 1,651.5547 (53.93) 355.5511 (482.64) 1,620.7315 (53.16) 30.9200 (129.37) 3;60 605.4901 (0.02) 538 1 183 | test_mozsqlparser 2,793.8400 (94.13) 12,358.7790 (245.07) 3,091.8519 (100.97) 960.4173 (>1000.0) 2,937.6310 (96.35) 243.3220 (>1000.0) 4;4 323.4308 (0.01) 316 1 184 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 185 | ``` 186 | 187 | ## Example 188 | 189 | The `depgraph` example reads a bunch of `.sql` files from disk using glob, and builds a dependency graph of all of the objects using graphviz. 190 | 191 | ``` 192 | poetry run python ./examples/depgraph.py --path {path/to/folder/with/queries} 193 | ``` 194 | 195 | ## Develop 196 | 197 | 1) Install `rustup` 198 | 199 | 2) `poetry install` will automatically create the venv, compile the package and install it into the venv via the build script. 200 | -------------------------------------------------------------------------------- /rust-toolchain: -------------------------------------------------------------------------------- 1 | stable -------------------------------------------------------------------------------- /sqloxide.pyi: -------------------------------------------------------------------------------- 1 | from typing import Any, Generic, Literal, TypeAlias, TypedDict, TypeVar 2 | 3 | Dialect: TypeAlias = Literal["generic", "ansi", "sqlite", "postgresql"] 4 | 5 | def parse_sql(sql: str, dialect: Dialect) -> list[Statement]: ... 6 | 7 | T = TypeVar("T") 8 | 9 | ObjectName: TypeAlias = list[Ident] 10 | """ 11 | A name of a table, view, custom type, etc. 12 | (possibly multi-part, i.e. db.schema.obj) 13 | """ 14 | 15 | Statement: TypeAlias = AstSelect | AstInsert | AstSetVariable | dict[str, Any] 16 | """ 17 | A top-level statement (SELECT, INSERT, CREATE, etc.) 18 | 19 | See https://docs.rs/sqlparser/0.51.0/sqlparser/ast/enum.Statement.html 20 | """ 21 | 22 | SetExpr: TypeAlias = AstSelect | AstSelect | AstQuery | AstValues | dict[str, Any] 23 | """ 24 | A node in a tree, representing a “query body” expression, 25 | roughly: `SELECT ... [ {UNION|EXCEPT|INTERSECT} SELECT ...]` 26 | 27 | See https://docs.rs/sqlparser/0.51.0/sqlparser/ast/enum.SetExpr.html 28 | """ 29 | 30 | Expr: TypeAlias = AstIdentifier | dict[str, Any] 31 | """ 32 | A SQL expression of any type. 33 | 34 | See https://docs.rs/sqlparser/0.51.0/sqlparser/ast/enum.Expr.html 35 | """ 36 | 37 | Value: TypeAlias = dict[str, Any] 38 | """ 39 | Primitive SQL values such as number and string. 40 | 41 | See https://docs.rs/sqlparser/0.51.0/sqlparser/ast/enum.Value.html 42 | """ 43 | 44 | TableFactor: TypeAlias = dict[str, Any] 45 | """ 46 | A table name or a parenthesized subquery with an optional alias. 47 | 48 | See https://docs.rs/sqlparser/0.51.0/sqlparser/ast/enum.TableFactor.html 49 | """ 50 | 51 | JoinOperator: TypeAlias = dict[str, Any] 52 | """See https://docs.rs/sqlparser/0.51.0/sqlparser/ast/enum.JoinOperator.html""" 53 | 54 | class One(TypedDict, Generic[T]): 55 | One: T 56 | 57 | class Many(TypedDict, Generic[T]): 58 | Many: list[T] 59 | 60 | class AstSelect(TypedDict): 61 | Select: Select 62 | 63 | class AstSetVariable(TypedDict): 64 | SetVariable: SetVariable 65 | 66 | class AstInsert(TypedDict): 67 | Insert: Insert 68 | 69 | class AstIdentifier(TypedDict): 70 | """An identifier (e.g. table name or column name)""" 71 | 72 | Identifier: Ident 73 | 74 | class AstCompoundIdentifier(TypedDict): 75 | """A multi-part identifier (e.g. table_alias.column or schema.table.col)""" 76 | 77 | CompoundIdentifier: list[Ident] 78 | 79 | class AstValue(TypedDict): 80 | """A literal value, such as string, number, date or NULL.""" 81 | 82 | Value: Value 83 | 84 | class AstValues(TypedDict): 85 | """An insert VALUES clause.""" 86 | 87 | Values: Values 88 | 89 | class AstQuery(TypedDict): 90 | """ 91 | A parenthesized subquery (SELECT ...), 92 | used in an expression like SELECT (subquery) AS x or WHERE (subquery) = x 93 | """ 94 | 95 | Query: Query 96 | 97 | class AstSubquery(TypedDict): 98 | """ 99 | A parenthesized SELECT subquery. 100 | 101 | When part of a `SetExpr`, a subquery may include more set operations 102 | in its body and an optional ORDER BY / LIMIT. 103 | """ 104 | 105 | Query: Query 106 | 107 | class AstTable(TypedDict): 108 | Table: Table 109 | 110 | class Ident(TypedDict): 111 | """An identifier, decomposed into its value or character data and the quote style.""" 112 | 113 | value: str 114 | quote_style: Any | None 115 | 116 | class SetVariable(TypedDict): 117 | """ 118 | SET = expression; 119 | SET (variable[, ...]) = (expression[, ...]); 120 | """ 121 | 122 | local: bool 123 | hivevar: bool 124 | variables: One[ObjectName] | Many[ObjectName] 125 | value: list[Expr] 126 | 127 | class Select(TypedDict("Select", {"from": list[TableWithJoins]})): 128 | select_token: Any # AttachedToken 129 | distinct: Any | None # Option 130 | top: Any | None # Option 133 | into: Any | None # Option 134 | lateral_views: list[Any] # Vec 135 | prewhere: Expr | None 136 | selection: Expr | None 137 | group_by: Any # GroupByExpr, 138 | cluster_by: list[Expr] 139 | distribute_by: list[Expr] 140 | sort_by: list[Expr] 141 | having: Expr | None 142 | named_window: list[Any] # Vec 143 | qualify: Expr | None 144 | window_before_qualify: bool 145 | value_table_mode: Any | None # Option 146 | connect_by: Any | None # Option 147 | 148 | class Insert(TypedDict("Insert", {"or": Any | None})): 149 | """ 150 | An INSERT statement. 151 | 152 | See https://docs.rs/sqlparser/0.51.0/sqlparser/ast/struct.Insert.html 153 | """ 154 | 155 | ignore: bool 156 | into: bool 157 | table_name: ObjectName 158 | table_alias: Any | None 159 | columns: list[Ident] 160 | overwrite: bool 161 | source: Query | None 162 | """A SQL query that specifies what to insert""" 163 | partitioned: Any | None 164 | after_columns: list[Any] 165 | table: bool 166 | on: dict[str, Any] | None # e.g. {"OnConflict": {"conflict_target": None, "action": "DoNothing"}}, 167 | returning: Any | None 168 | replace_into: bool 169 | priority: Any | None 170 | insert_alias: Any | None 171 | 172 | class Query(TypedDict("Query", {"with": Any | None})): 173 | """ 174 | The most complete variant of a SELECT query expression, 175 | optionally including WITH, UNION / other set operations, and ORDER BY. 176 | """ 177 | 178 | body: SetExpr 179 | order_by: Any | None 180 | limit: Any | None 181 | limit_by: list[Any] 182 | offset: Any | None 183 | fetch: Any | None 184 | locks: list[Any] 185 | for_clause: Any | None 186 | settings: Any | None 187 | format_clause: Any | None 188 | 189 | class Values(TypedDict): 190 | explicit_row: bool 191 | rows: list[list[Expr]] 192 | 193 | class TableWithJoins(TypedDict): 194 | relation: TableFactor 195 | joins: list[Join] 196 | 197 | class Join(TypedDict("Join", {"global": bool})): 198 | relation: TableFactor 199 | join_operator: JoinOperator 200 | 201 | class Table(TypedDict): 202 | name: ObjectName 203 | alias: Any | None # Option 204 | args: Any | None # Option 205 | with_hints: list[Expr] 206 | version: Any | None # Option 207 | with_ordinality: bool 208 | partitions: list[Ident] 209 | json_path: Any | None # Option 210 | sample: Any | None # Option 211 | -------------------------------------------------------------------------------- /sqloxide/__init__.py: -------------------------------------------------------------------------------- 1 | from .sqloxide import * 2 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use pythonize::pythonize; 2 | 3 | use pyo3::exceptions::PyValueError; 4 | use pyo3::prelude::*; 5 | 6 | use pythonize::PythonizeError; 7 | 8 | use sqlparser::ast::Statement; 9 | use sqlparser::dialect::dialect_from_str; 10 | use sqlparser::dialect::*; 11 | use sqlparser::parser::Parser; 12 | 13 | mod visitor; 14 | use visitor::{extract_expressions, extract_relations, mutate_expressions, mutate_relations}; 15 | 16 | /// Function to parse SQL statements from a string. Returns a list with 17 | /// one item per query statement. 18 | /// 19 | /// Available `dialects`: https://github.com/sqlparser-rs/sqlparser-rs/blob/main/src/dialect/mod.rs#L189-L206 20 | #[pyfunction] 21 | #[pyo3(text_signature = "(sql, dialect)")] 22 | fn parse_sql(py: Python, sql: String, dialect: String) -> PyResult { 23 | let chosen_dialect = dialect_from_str(dialect).unwrap_or_else(|| { 24 | println!("The dialect you chose was not recognized, falling back to 'generic'"); 25 | Box::new(GenericDialect {}) 26 | }); 27 | let parse_result = Parser::parse_sql(&*chosen_dialect, &sql); 28 | 29 | let output = match parse_result { 30 | Ok(statements) => pythonize(py, &statements).map_err(|e| { 31 | let msg = e.to_string(); 32 | PyValueError::new_err(format!("Python object serialization failed.\n\t{msg}")) 33 | })?, 34 | Err(e) => { 35 | let msg = e.to_string(); 36 | return Err(PyValueError::new_err(format!( 37 | "Query parsing failed.\n\t{msg}" 38 | ))); 39 | } 40 | }; 41 | 42 | Ok(output.into()) 43 | } 44 | 45 | /// This utility function allows reconstituing a modified AST back into list of SQL queries. 46 | #[pyfunction] 47 | #[pyo3(text_signature = "(ast)")] 48 | fn restore_ast(_py: Python, ast: &Bound<'_, PyAny>) -> PyResult> { 49 | let parse_result: Result, PythonizeError> = pythonize::depythonize(ast); 50 | 51 | let output = match parse_result { 52 | Ok(statements) => statements, 53 | Err(e) => { 54 | let msg = e.to_string(); 55 | return Err(PyValueError::new_err(format!( 56 | "Query serialization failed.\n\t{msg}" 57 | ))); 58 | } 59 | }; 60 | 61 | Ok(output 62 | .iter() 63 | .map(std::string::ToString::to_string) 64 | .collect::>()) 65 | } 66 | 67 | #[pymodule] 68 | fn sqloxide(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { 69 | m.add_function(wrap_pyfunction!(parse_sql, m)?)?; 70 | m.add_function(wrap_pyfunction!(restore_ast, m)?)?; 71 | // TODO: maybe refactor into seperate module 72 | m.add_function(wrap_pyfunction!(extract_relations, m)?)?; 73 | m.add_function(wrap_pyfunction!(mutate_relations, m)?)?; 74 | m.add_function(wrap_pyfunction!(extract_expressions, m)?)?; 75 | m.add_function(wrap_pyfunction!(mutate_expressions, m)?)?; 76 | Ok(()) 77 | } 78 | -------------------------------------------------------------------------------- /src/visitor.rs: -------------------------------------------------------------------------------- 1 | use core::ops::ControlFlow; 2 | 3 | use pyo3::exceptions::PyValueError; 4 | use pyo3::prelude::*; 5 | 6 | use serde::Serialize; 7 | 8 | use sqlparser::ast::{ 9 | Statement, ObjectNamePart, {visit_expressions, visit_expressions_mut, visit_relations, visit_relations_mut}, 10 | }; 11 | 12 | // Refactored function for handling depythonization 13 | fn depythonize_query(parsed_query: &Bound<'_, PyAny>) -> Result, PyErr> { 14 | match pythonize::depythonize(parsed_query) { 15 | Ok(statements) => Ok(statements), 16 | Err(e) => { 17 | let msg = e.to_string(); 18 | Err(PyValueError::new_err(format!( 19 | "Query serialization failed.\n\t{msg}" 20 | ))) 21 | } 22 | } 23 | } 24 | 25 | fn pythonize_query_output(py: Python, output: Vec) -> PyResult> 26 | where 27 | T: Sized + Serialize, 28 | { 29 | match pythonize::pythonize(py, &output) { 30 | Ok(p) => Ok(p.into()), 31 | Err(e) => { 32 | let msg = e.to_string(); 33 | Err(PyValueError::new_err(format!( 34 | "Python object serialization failed.\n\t{msg}" 35 | ))) 36 | } 37 | } 38 | } 39 | 40 | #[pyfunction] 41 | #[pyo3(text_signature = "(parsed_query)")] 42 | pub fn extract_relations(py: Python, parsed_query: &Bound<'_, PyAny>) -> PyResult { 43 | let statements = depythonize_query(parsed_query)?; 44 | 45 | let mut relations = Vec::new(); 46 | for statement in statements { 47 | visit_relations(&statement, |relation| { 48 | relations.push(relation.clone()); 49 | ControlFlow::<()>::Continue(()) 50 | }); 51 | } 52 | 53 | pythonize_query_output(py, relations) 54 | } 55 | 56 | #[pyfunction] 57 | #[pyo3(text_signature = "(parsed_query, func)")] 58 | pub fn mutate_relations(_py: Python, parsed_query: &Bound<'_, PyAny>, func: &Bound<'_, PyAny>) -> PyResult> { 59 | let mut statements = depythonize_query(parsed_query)?; 60 | 61 | for statement in &mut statements { 62 | visit_relations_mut(statement, |table| { 63 | for section in &mut table.0 { 64 | let ObjectNamePart::Identifier(ident) = section; 65 | let val = match func.call1((ident.value.clone(),)) { 66 | Ok(val) => val, 67 | Err(e) => { 68 | let msg = e.to_string(); 69 | return ControlFlow::Break(PyValueError::new_err(format!( 70 | "Python object serialization failed.\n\t{msg}" 71 | ))); 72 | } 73 | }; 74 | 75 | ident.value = val.to_string(); 76 | } 77 | ControlFlow::Continue(()) 78 | }); 79 | } 80 | 81 | Ok(statements 82 | .iter() 83 | .map(std::string::ToString::to_string) 84 | .collect::>()) 85 | } 86 | 87 | #[pyfunction] 88 | #[pyo3(text_signature = "(parsed_query, func)")] 89 | pub fn mutate_expressions(py: Python, parsed_query: &Bound<'_, PyAny>, func: &Bound<'_, PyAny>) -> PyResult> { 90 | let mut statements: Vec = depythonize_query(parsed_query)?; 91 | 92 | for statement in &mut statements { 93 | visit_expressions_mut(statement, |expr| { 94 | let converted_expr = match pythonize::pythonize(py, expr) { 95 | Ok(val) => val, 96 | Err(e) => { 97 | let msg = e.to_string(); 98 | return ControlFlow::Break(PyValueError::new_err(format!( 99 | "Python object deserialization failed.\n\t{msg}" 100 | ))); 101 | } 102 | }; 103 | 104 | let func_result = match func.call1((converted_expr,)) { 105 | Ok(val) => val, 106 | Err(e) => { 107 | let msg = e.to_string(); 108 | return ControlFlow::Break(PyValueError::new_err(format!( 109 | "Calling python function failed.\n\t{msg}" 110 | ))); 111 | } 112 | }; 113 | 114 | *expr = match pythonize::depythonize(&func_result) { 115 | Ok(val) => val, 116 | Err(e) => { 117 | let msg = e.to_string(); 118 | return ControlFlow::Break(PyValueError::new_err(format!( 119 | "Python object reserialization failed.\n\t{msg}" 120 | ))); 121 | } 122 | }; 123 | 124 | ControlFlow::Continue(()) 125 | }); 126 | } 127 | 128 | Ok(statements 129 | .iter() 130 | .map(std::string::ToString::to_string) 131 | .collect::>()) 132 | } 133 | 134 | #[pyfunction] 135 | #[pyo3(text_signature = "(parsed_query)")] 136 | pub fn extract_expressions(py: Python, parsed_query: &Bound<'_, PyAny>) -> PyResult { 137 | let statements: Vec = depythonize_query(parsed_query)?; 138 | 139 | let mut expressions = Vec::new(); 140 | for statement in statements { 141 | visit_expressions(&statement, |expr| { 142 | expressions.push(expr.clone()); 143 | ControlFlow::<()>::Continue(()) 144 | }); 145 | } 146 | 147 | pythonize_query_output(py, expressions) 148 | } 149 | -------------------------------------------------------------------------------- /tests/benchmark.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from sqloxide import parse_sql 4 | import sqlparse 5 | import sqlglot 6 | import json 7 | import moz_sql_parser 8 | 9 | TEST_SQL = """ 10 | SELECT employee.first_name, employee.last_name, 11 | call.start_time, call.end_time, call_outcome.outcome_text 12 | FROM employee 13 | INNER JOIN call ON call.employee_id = employee.id 14 | INNER JOIN call_outcome ON call.call_outcome_id = call_outcome.id 15 | ORDER BY call.start_time ASC; 16 | """ 17 | 18 | 19 | def bench_parse_sql(): 20 | return parse_sql(sql=TEST_SQL, dialect="ansi") 21 | 22 | 23 | def bench_sqlparser(): 24 | return sqlparse.parse(TEST_SQL)[0] 25 | 26 | 27 | def bench_mozsqlparser(): 28 | return json.dumps(moz_sql_parser.parse(TEST_SQL)) 29 | 30 | 31 | def bench_sqlglot(): 32 | return sqlglot.parse(TEST_SQL, error_level=sqlglot.ErrorLevel.IGNORE) 33 | 34 | 35 | def test_sqloxide(benchmark): 36 | benchmark(bench_parse_sql) 37 | 38 | 39 | def test_sqlparser(benchmark): 40 | benchmark(bench_sqlparser) 41 | 42 | 43 | def test_mozsqlparser(benchmark): 44 | benchmark(bench_mozsqlparser) 45 | 46 | 47 | def test_sqlglot(benchmark): 48 | benchmark(bench_sqlglot) 49 | -------------------------------------------------------------------------------- /tests/test_sqloxide.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from sqloxide import ( 3 | parse_sql, 4 | restore_ast, 5 | extract_relations, 6 | mutate_relations, 7 | extract_expressions, 8 | mutate_expressions, 9 | ) 10 | 11 | 12 | SQL = """ 13 | SELECT employee.first_name, employee.last_name, 14 | c.start_time, c.end_time, call_outcome.outcome_text 15 | FROM employee 16 | INNER JOIN "call"."call"."call" c ON c.employee_id = employee.id 17 | INNER JOIN call_outcome ON c.call_outcome_id = call_outcome.id 18 | ORDER BY c.start_time ASC; 19 | """ 20 | 21 | 22 | def test_parse_sql(): 23 | ast = parse_sql(sql=SQL, dialect="ansi")[0] 24 | 25 | assert isinstance(ast, dict) 26 | assert len(ast["Query"].keys()) > 0 27 | 28 | assert "order_by" in ast["Query"].keys() 29 | assert "body" in ast["Query"].keys() 30 | 31 | 32 | def test_throw_exception(): 33 | sql = """ 34 | SELECT $# as 1; 35 | """ 36 | with pytest.raises( 37 | ValueError, match=r"Query parsing failed.\n\tsql parser error: .+" 38 | ): 39 | _ast = parse_sql(sql=sql, dialect="ansi")[0] 40 | 41 | 42 | def test_extract_relations(): 43 | ast = parse_sql(sql=SQL, dialect="ansi") 44 | 45 | assert extract_relations(parsed_query=ast)[0][0] == { 46 | "Identifier": { 47 | "value": "employee", 48 | "quote_style": None, 49 | "span": { 50 | "start": {"line": 4, "column": 10}, 51 | "end": {"line": 4, "column": 18}, 52 | }, 53 | } 54 | } 55 | 56 | 57 | def test_mutate_relations(): 58 | def func(x): 59 | return x.replace("call", "call2") 60 | 61 | ast = parse_sql(sql=SQL, dialect="ansi") 62 | assert mutate_relations(parsed_query=ast, func=func) == [ 63 | 'SELECT employee.first_name, employee.last_name, c.start_time, c.end_time, call_outcome.outcome_text FROM employee INNER JOIN "call2"."call2"."call2" AS c ON c.employee_id = employee.id INNER JOIN call2_outcome ON c.call_outcome_id = call_outcome.id ORDER BY c.start_time ASC' 64 | ] 65 | 66 | 67 | def test_restore_ast(): 68 | """ 69 | Note, we are stripping formatting from the SQL string before comparing because 70 | formatting is not expected to be preserved. 71 | """ 72 | sql = "SELECT employee.first_name, employee.last_name, call.start_time, call.end_time, call_outcome.outcome_text FROM employee JOIN call ON call.employee_id = employee.id JOIN call_outcome ON call.call_outcome_id = call_outcome.id ORDER BY call.start_time ASC" 73 | 74 | ast = parse_sql(sql=sql, dialect="ansi") 75 | print(ast) 76 | # testing that the query roundtrips 77 | assert sql == restore_ast(ast=ast)[0] 78 | 79 | 80 | def test_mutate_expressions(): 81 | def func(x): 82 | if "CompoundIdentifier" in x.keys(): 83 | for y in x["CompoundIdentifier"]: 84 | y["value"] = y["value"].upper() 85 | return x 86 | 87 | ast = parse_sql(sql=SQL, dialect="ansi") 88 | result = mutate_expressions(parsed_query=ast, func=func) 89 | assert result == [ 90 | 'SELECT EMPLOYEE.FIRST_NAME, EMPLOYEE.LAST_NAME, C.START_TIME, C.END_TIME, CALL_OUTCOME.OUTCOME_TEXT FROM employee INNER JOIN "call"."call"."call" AS c ON C.EMPLOYEE_ID = EMPLOYEE.ID INNER JOIN call_outcome ON C.CALL_OUTCOME_ID = CALL_OUTCOME.ID ORDER BY C.START_TIME ASC' 91 | ] 92 | 93 | 94 | def test_extract_expressions(): 95 | ast = parse_sql(sql=SQL, dialect="ansi") 96 | exprs = extract_expressions(parsed_query=ast) 97 | for expr in exprs: 98 | print("EXPR: ", expr) 99 | 100 | assert exprs[0] == { 101 | "CompoundIdentifier": [ 102 | { 103 | "value": "employee", 104 | "quote_style": None, 105 | "span": { 106 | "end": {"column": 20, "line": 2}, 107 | "start": {"column": 12, "line": 2}, 108 | }, 109 | }, 110 | { 111 | "value": "first_name", 112 | "quote_style": None, 113 | "span": { 114 | "end": {"column": 31, "line": 2}, 115 | "start": {"column": 21, "line": 2}, 116 | }, 117 | }, 118 | ] 119 | } 120 | --------------------------------------------------------------------------------