├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── python-package.yml │ └── python-publish.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── docs ├── Makefile ├── _static │ ├── column.jpg │ └── table.jpg ├── basic_concepts │ ├── analyzer.rst │ ├── holder.rst │ ├── metadata_provider.rst │ ├── model.rst │ └── runner.rst ├── behind_the_scene │ ├── column-level_lineage_design.rst │ ├── dialect-awareness_lineage_design.rst │ ├── dos_and_donts.rst │ ├── how_sqllineage_work.rst │ └── why_sqllineage.rst ├── conf.py ├── first_steps │ ├── advanced_usage.rst │ ├── beyond_command_line.rst │ └── getting_started.rst ├── gear_up │ ├── configuration.rst │ └── metadata.rst ├── index.rst ├── make.bat └── release_note │ └── changelog.rst ├── hatch_build.py ├── pyproject.toml ├── sqllineage ├── __init__.py ├── cli.py ├── config.py ├── core │ ├── __init__.py │ ├── analyzer.py │ ├── holders.py │ ├── metadata │ │ ├── __init__.py │ │ ├── dummy.py │ │ └── sqlalchemy.py │ ├── metadata_provider.py │ ├── models.py │ └── parser │ │ ├── __init__.py │ │ ├── sqlfluff │ │ ├── __init__.py │ │ ├── analyzer.py │ │ ├── extractors │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── copy.py │ │ │ ├── create_insert.py │ │ │ ├── cte.py │ │ │ ├── drop.py │ │ │ ├── merge.py │ │ │ ├── noop.py │ │ │ ├── rename.py │ │ │ ├── select.py │ │ │ ├── unload.py │ │ │ └── update.py │ │ ├── models.py │ │ └── utils.py │ │ └── sqlparse │ │ ├── __init__.py │ │ ├── analyzer.py │ │ ├── handlers │ │ ├── __init__.py │ │ ├── base.py │ │ ├── cte.py │ │ ├── source.py │ │ ├── swap_partition.py │ │ └── target.py │ │ ├── models.py │ │ └── utils.py ├── data │ └── tpcds │ │ ├── query01.sql │ │ ├── query02.sql │ │ ├── query03.sql │ │ ├── query04.sql │ │ ├── query05.sql │ │ ├── query06.sql │ │ ├── query07.sql │ │ ├── query08.sql │ │ ├── query09.sql │ │ ├── query10.sql │ │ ├── query11.sql │ │ ├── query12.sql │ │ ├── query13.sql │ │ ├── query14.sql │ │ ├── query15.sql │ │ ├── query16.sql │ │ ├── query17.sql │ │ ├── query18.sql │ │ ├── query19.sql │ │ ├── query20.sql │ │ ├── query21.sql │ │ ├── query22.sql │ │ ├── query23.sql │ │ ├── query24.sql │ │ ├── query25.sql │ │ ├── query26.sql │ │ ├── query27.sql │ │ ├── query28.sql │ │ ├── query29.sql │ │ ├── query30.sql │ │ ├── query31.sql │ │ ├── query32.sql │ │ ├── query33.sql │ │ ├── query34.sql │ │ ├── query35.sql │ │ ├── query36.sql │ │ ├── query37.sql │ │ ├── query38.sql │ │ ├── query39.sql │ │ ├── query40.sql │ │ ├── query41.sql │ │ ├── query42.sql │ │ ├── query43.sql │ │ ├── query44.sql │ │ ├── query45.sql │ │ ├── query46.sql │ │ ├── query47.sql │ │ ├── query48.sql │ │ ├── query49.sql │ │ ├── query50.sql │ │ ├── query51.sql │ │ ├── query52.sql │ │ ├── query53.sql │ │ ├── query54.sql │ │ ├── query55.sql │ │ ├── query56.sql │ │ ├── query57.sql │ │ ├── query58.sql │ │ ├── query59.sql │ │ ├── query60.sql │ │ ├── query61.sql │ │ ├── query62.sql │ │ ├── query63.sql │ │ ├── query64.sql │ │ ├── query65.sql │ │ ├── query66.sql │ │ ├── query67.sql │ │ ├── query68.sql │ │ ├── query69.sql │ │ ├── query70.sql │ │ ├── query71.sql │ │ ├── query72.sql │ │ ├── query73.sql │ │ ├── query74.sql │ │ ├── query75.sql │ │ ├── query76.sql │ │ ├── query77.sql │ │ ├── query78.sql │ │ ├── query79.sql │ │ ├── query80.sql │ │ ├── query81.sql │ │ ├── query82.sql │ │ ├── query83.sql │ │ ├── query84.sql │ │ ├── query85.sql │ │ ├── query86.sql │ │ ├── query87.sql │ │ ├── query88.sql │ │ ├── query89.sql │ │ ├── query90.sql │ │ ├── query91.sql │ │ ├── query92.sql │ │ ├── query93.sql │ │ ├── query94.sql │ │ ├── query95.sql │ │ ├── query96.sql │ │ ├── query97.sql │ │ ├── query98.sql │ │ └── query99.sql ├── drawing.py ├── exceptions.py ├── io.py ├── runner.py └── utils │ ├── __init__.py │ ├── constant.py │ ├── entities.py │ └── helpers.py ├── sqllineagejs ├── .eslintrc ├── eslint.config.js ├── index.html ├── package-lock.json ├── package.json ├── public │ └── favicon.ico ├── src │ ├── App.jsx │ ├── api │ │ └── client.js │ ├── app │ │ └── store.js │ ├── config.js │ ├── features │ │ ├── directory │ │ │ ├── Directory.jsx │ │ │ ├── DirectoryTreeItem.jsx │ │ │ └── directorySlice.js │ │ ├── editor │ │ │ ├── DAG.jsx │ │ │ ├── DAGDesc.jsx │ │ │ ├── Editor.jsx │ │ │ └── editorSlice.js │ │ └── widget │ │ │ ├── LoadError.jsx │ │ │ └── Loading.jsx │ ├── index.css │ └── main.jsx └── vite.config.js └── tests ├── __init__.py ├── core ├── __init__.py ├── test_cli.py ├── test_config.py ├── test_drawing.py ├── test_exception.py ├── test_holder.py ├── test_metadata_provider.py ├── test_models.py ├── test_parser.py └── test_runner.py ├── helpers.py └── sql ├── __init__.py ├── column ├── __init__.py ├── multiple_statements │ ├── __init__.py │ └── test_session_metadata.py ├── test_column_merge.py ├── test_column_select_case_when.py ├── test_column_select_cast.py ├── test_column_select_column.py ├── test_column_select_column_dialect_specific.py ├── test_column_select_column_specified_in_dml.py ├── test_column_select_expression.py ├── test_column_select_from_cte.py ├── test_column_select_from_join.py ├── test_column_select_from_subquery.py ├── test_column_select_function.py ├── test_column_select_lateral_alias_ref.py ├── test_column_select_union.py ├── test_column_update.py ├── test_metadata_target_column.py ├── test_metadata_unqualified_column.py └── test_metadata_wildcard.py └── table ├── __init__.py ├── multiple_statements ├── __init__.py ├── test_split.py ├── test_tmp_table.py ├── test_tsql_no_semicolon.py └── test_variable.py ├── test_create.py ├── test_create_dialect_specific.py ├── test_cte.py ├── test_cte_dialect_specific.py ├── test_insert.py ├── test_insert_dialect_specific.py ├── test_merge.py ├── test_merge_dialect_specific.py ├── test_other_with_lineage.py ├── test_other_with_lineage_dialect_specific.py ├── test_other_without_lineage.py ├── test_other_without_lineage_dialect_specific.py ├── test_path_dialect_specific.py ├── test_select.py ├── test_select_dialect_specific.py ├── test_update.py └── test_update_dialect_specific.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .mypy_cache 2 | .pytest_cache 3 | .tox 4 | dist 5 | htmlcov 6 | */build 7 | */node_modules 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Report a bug to help improve sqllineage 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | * A clear and concise description of what the bug is. 12 | 13 | **SQL** 14 | Paste the SQL text here. For example: 15 | ```sql 16 | insert into analyze select * from foo; 17 | ``` 18 | 19 | **To Reproduce** 20 | *Note here we refer to SQL provided in prior step as stored in a file named `test.sql`* 21 | 22 | - `if` CLI (Command Line Interface): provide the command you're calling and the output. 23 | For example: 24 | ```shell 25 | sqllineage -f test.sql --dialect=ansi 26 | ``` 27 | ``` 28 | Statements(#): 1 29 | Source Tables: 30 | .foo 31 | Target Tables: 32 | .analyze 33 | ``` 34 | 35 | - `elif` API (Application Programming Interface): provide the python code you're using and the output. 36 | For example: 37 | ```python 38 | from sqllineage.runner import LineageRunner 39 | with open("test.sql") as f: 40 | sql = f.read() 41 | result = LineageRunner(sql, dialect="ansi") 42 | print(result.target_tables) 43 | ``` 44 | ``` 45 | [Table: .analyze] 46 | ``` 47 | 48 | - `elif` Web UI (Web User Interface): provide the lineage graph which could be downloaded from the page, or screenshots if there're components other than the lineage graph that's related to this bug. 49 | 50 | - `else`: whatever other ways to reproduce this bug. 51 | 52 | **Expected behavior** 53 | A clear and concise description of what you expected to happen, and the output in accordance with the `To Reproduce` section. 54 | 55 | **Python version (available via `python --version`)** 56 | - 3.8.17 57 | - 3.9.18 58 | - 3.10.13 59 | - 3.11.5 60 | - etc. 61 | 62 | **SQLLineage version (available via `sqllineage --version`):** 63 | - 1.3.8 64 | - 1.4.7 65 | - etc. 66 | 67 | **Additional context** 68 | Add any other context about the problem here. 69 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for sqllineage 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: build 5 | 6 | on: 7 | push: 8 | pull_request: 9 | branches: 10 | - master 11 | schedule: 12 | - cron: '42 0 * * *' 13 | 14 | jobs: 15 | build: 16 | 17 | runs-on: ${{ matrix.os }} 18 | strategy: 19 | matrix: 20 | os: [ubuntu-latest, macos-latest, windows-latest] 21 | python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v4 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | - name: Set up NodeJS 30 | uses: actions/setup-node@v3 31 | with: 32 | node-version: '20' 33 | - name: Install 34 | run: pip install tox codecov 35 | - name: Script 36 | run: tox -e py 37 | - name: After Success 38 | uses: codecov/codecov-action@v4 39 | with: 40 | token: ${{ secrets.CODECOV_TOKEN }} 41 | verbose: true 42 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | - uses: actions/setup-python@v4 16 | with: 17 | python-version: '3.x' 18 | - name: Install dependencies and build distribution 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install build 22 | python -m build 23 | - uses: actions/upload-artifact@v4 24 | with: 25 | path: ./dist 26 | pypi-publish: 27 | needs: ['build'] 28 | runs-on: ubuntu-latest 29 | environment: release 30 | permissions: 31 | id-token: write 32 | steps: 33 | - uses: actions/download-artifact@v4 34 | - name: Publish package distributions to PyPI 35 | uses: pypa/gh-action-pypi-publish@release/v1 36 | with: 37 | packages-dir: artifact/ 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # dependencies 107 | node_modules 108 | .pnp 109 | .pnp.js 110 | 111 | # misc 112 | .DS_Store 113 | .env.local 114 | .env.development.local 115 | .env.test.local 116 | .env.production.local 117 | 118 | npm-debug.log* 119 | yarn-debug.log* 120 | yarn-error.log* 121 | 122 | # Optional eslint cache 123 | .eslintcache 124 | 125 | # pycharm 126 | .idea/ 127 | 128 | # local testing sql files 129 | test*.sql 130 | 131 | # sqlite database generated by test cases 132 | *.db 133 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 24.1.0 4 | hooks: 5 | - id: black 6 | language_version: python3.9 7 | - repo: https://github.com/astral-sh/ruff-pre-commit 8 | rev: v0.9.9 9 | hooks: 10 | - id: ruff 11 | - repo: https://github.com/pre-commit/mirrors-mypy 12 | rev: v1.8.0 13 | hooks: 14 | - id: mypy 15 | additional_dependencies: 16 | - sqlfluff 17 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.11" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/conf.py 17 | fail_on_warning: true 18 | 19 | # Optionally build your docs in additional formats such as PDF and ePub 20 | formats: all 21 | 22 | # We recommend specifying your dependencies to enable reproducible builds: 23 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 24 | python: 25 | install: 26 | - method: pip 27 | path: . 28 | extra_requirements: 29 | - docs 30 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nikolaik/python-nodejs:python3.10-nodejs18-slim 2 | 3 | # copy source files to docker image 4 | ARG CWD=/mnt/sqllineage 5 | ADD sqllineage/ ${CWD}/sqllineage 6 | ADD sqllineagejs/ ${CWD}/sqllineagejs 7 | COPY pyproject.toml setup.py README.md ${CWD}/ 8 | WORKDIR ${CWD} 9 | 10 | # install and remove all source code 11 | RUN pip install ${CWD} \ 12 | && rm -rf ${CWD}/* 13 | 14 | # Run the image as a non-root user 15 | RUN adduser --quiet sqllineage 16 | USER sqllineage 17 | 18 | # $PORT environment variable will be passed with --env in docker run command 19 | CMD sqllineage -g -H 0.0.0.0 -p $PORT 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/_static/column.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/docs/_static/column.jpg -------------------------------------------------------------------------------- /docs/_static/table.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/docs/_static/table.jpg -------------------------------------------------------------------------------- /docs/basic_concepts/analyzer.rst: -------------------------------------------------------------------------------- 1 | ******** 2 | Analyzer 3 | ******** 4 | 5 | LineageAnalyzer is an abstract class, supposed to include the core processing logic for one-statement SQL analysis. 6 | 7 | Each parser implementation will inherit LineageAnalyzer and do parser specific analysis based on the AST they generates 8 | and store the result in ``StatementLineageHolder``. 9 | 10 | LineageAnalyzer 11 | ======================================== 12 | 13 | .. autoclass:: sqllineage.core.analyzer.LineageAnalyzer 14 | :members: 15 | -------------------------------------------------------------------------------- /docs/basic_concepts/holder.rst: -------------------------------------------------------------------------------- 1 | ****** 2 | Holder 3 | ****** 4 | 5 | LineageHolder is an abstraction to hold the lineage result analyzed by LineageAnalyzer at different level. 6 | 7 | At the bottom, we have :class:`sqllineage.core.holders.SubQueryLineageHolder` to hold lineage at subquery level. 8 | This is used internally by :class:`sqllineage.core.analyzer.LineageAnalyzer`. 9 | 10 | LineageAnalyzer generates :class:`sqllineage.core.holder.StatementLineageHolder` 11 | as the result of lineage at SQL statement level. 12 | 13 | To assemble multiple :class:`sqllineage.core.holder.StatementLineageHolder` into a DAG based data structure serving 14 | for the final output, we have :class:`sqllineage.core.holders.SQLLineageHolder` 15 | 16 | 17 | SubQueryLineageHolder 18 | ============================================== 19 | 20 | .. autoclass:: sqllineage.core.holders.SubQueryLineageHolder 21 | :members: 22 | 23 | 24 | StatementLineageHolder 25 | ============================================== 26 | 27 | .. autoclass:: sqllineage.core.holders.StatementLineageHolder 28 | :members: 29 | 30 | 31 | SQLLineageHolder 32 | ============================================== 33 | 34 | .. autoclass:: sqllineage.core.holders.SQLLineageHolder 35 | :members: 36 | -------------------------------------------------------------------------------- /docs/basic_concepts/metadata_provider.rst: -------------------------------------------------------------------------------- 1 | **************** 2 | MetaDataProvider 3 | **************** 4 | 5 | sqllineage.core.metadata_provider.MetaDataProvider 6 | ================================================== 7 | 8 | .. autoclass:: sqllineage.core.metadata_provider.MetaDataProvider 9 | :members: 10 | -------------------------------------------------------------------------------- /docs/basic_concepts/model.rst: -------------------------------------------------------------------------------- 1 | ***** 2 | Model 3 | ***** 4 | 5 | Several data classes in this module. 6 | 7 | Schema 8 | ============================= 9 | 10 | .. autoclass:: sqllineage.core.models.Schema 11 | 12 | 13 | Table 14 | ============================= 15 | 16 | .. autoclass:: sqllineage.core.models.Table 17 | 18 | 19 | SubQuery 20 | ============================= 21 | 22 | .. autoclass:: sqllineage.core.models.SubQuery 23 | 24 | 25 | Column 26 | ============================= 27 | 28 | .. autoclass:: sqllineage.core.models.Column 29 | -------------------------------------------------------------------------------- /docs/basic_concepts/runner.rst: -------------------------------------------------------------------------------- 1 | ****** 2 | Runner 3 | ****** 4 | 5 | LineageRunner is the entry point for SQLLineage core processing logic. After parsing command-line options, a string 6 | representation of SQL statements will be fed to LineageRunner for processing. From a bird's-eye view, it contains 7 | three steps: 8 | 9 | 1. Calling ``sqllineage.utils.helpers.split`` function to split string-base SQL statements into a list of ``str`` statements. 10 | 11 | 2. Calling :class:`sqllineage.core.analyzer.LineageAnalyzer` to analyze each one statement sql string. Get a list of 12 | :class:`sqllineage.core.holders.StatementLineageHolder` . 13 | 14 | 3. Calling :class:`sqllineage.core.holders.SQLLineageHolder.of` function to assemble the list of 15 | :class:`sqllineage.core.holders.StatementLineageHolder` into one :class:`sqllineage.core.holders.SQLLineageHolder`. 16 | 17 | :class:`sqllineage.core.holders.SQLLineageHolder` then will serve for lineage summary, in text or in visualization 18 | form. 19 | 20 | sqllineage.runner.LineageRunner 21 | =============================== 22 | 23 | .. autoclass:: sqllineage.runner.LineageRunner 24 | :members: 25 | :special-members: __str__ 26 | 27 | 28 | sqllineage.cli.main 29 | ====================== 30 | 31 | .. autofunction:: sqllineage.cli.main 32 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | import os 20 | import sys 21 | from datetime import datetime 22 | 23 | sys.path.insert(0, os.path.abspath("..")) 24 | 25 | from sqllineage import NAME, VERSION # noqa 26 | 27 | project = NAME 28 | copyright = f"2019-{datetime.now().year}, Reata" # noqa 29 | author = "Reata" 30 | 31 | # The full version, including alpha/beta/rc tags 32 | release = version = VERSION 33 | 34 | # -- General configuration --------------------------------------------------- 35 | 36 | # Add any Sphinx extension module names here, as strings. They can be 37 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 38 | # ones. 39 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.viewcode"] 40 | 41 | # Both the class’ and the __init__ method’s docstring are concatenated and inserted. 42 | autoclass_content = "both" 43 | 44 | autodoc_default_options = {"member-order": "bysource"} 45 | 46 | # Add any paths that contain templates here, relative to this directory. 47 | templates_path = ["_templates"] 48 | 49 | # List of patterns, relative to source directory, that match files and 50 | # directories to ignore when looking for source files. 51 | # This pattern also affects html_static_path and html_extra_path. 52 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 53 | 54 | 55 | # -- Options for HTML output ------------------------------------------------- 56 | 57 | # The theme to use for HTML and HTML Help pages. See the documentation for 58 | # a list of builtin themes. 59 | # 60 | html_theme = "sphinx_rtd_theme" 61 | 62 | # Add any paths that contain custom static files (such as style sheets) here, 63 | # relative to this directory. They are copied after the builtin static files, 64 | # so a file named "default.css" will overwrite the builtin "default.css". 65 | html_static_path = ["_static"] 66 | -------------------------------------------------------------------------------- /docs/first_steps/beyond_command_line.rst: -------------------------------------------------------------------------------- 1 | ******************* 2 | Beyond Command Line 3 | ******************* 4 | 5 | Since sqllineage is a Python package, after installation, you can also import it and use the Python API to achieve 6 | the same functionality. 7 | 8 | .. code-block:: python 9 | 10 | >>> from sqllineage.runner import LineageRunner 11 | >>> sql = "insert into db1.table11 select * from db2.table21 union select * from db2.table22;" 12 | >>> sql += "insert into db3.table3 select * from db1.table11 join db1.table12;" 13 | >>> result = LineageRunner(sql) 14 | # To show lineage summary 15 | >>> print(result) 16 | Statements(#): 2 17 | Source Tables: 18 | db1.table12 19 | db2.table21 20 | db2.table22 21 | Target Tables: 22 | db3.table3 23 | Intermediate Tables: 24 | db1.table11 25 | # To parse all the source tables 26 | >>> for tbl in result.source_tables: print(tbl) 27 | db1.table12 28 | db2.table21 29 | db2.table22 30 | # likewise for target tables 31 | >>> for tbl in result.target_tables: print(tbl) 32 | db3.table13 33 | # To pop up a webserver for visualization 34 | >>> result.draw() 35 | -------------------------------------------------------------------------------- /docs/first_steps/getting_started.rst: -------------------------------------------------------------------------------- 1 | *************** 2 | Getting Started 3 | *************** 4 | 5 | Install via PyPI 6 | ================== 7 | 8 | Install the package via ``pip`` (or add it to your ``requirements.txt`` file), run:: 9 | 10 | pip install sqllineage 11 | 12 | 13 | Install via GitHub 14 | ================== 15 | 16 | If you want the latest development version, you can install directly from GitHub:: 17 | 18 | pip install git+https://github.com/reata/sqllineage.git 19 | 20 | 21 | .. note:: 22 | Installation from GitHub (or source code) requires **NodeJS/npm** for frontend code building, while for PyPI, 23 | we already pre-built the frontend code so Python/pip will be enough. 24 | 25 | SQLLineage in Command Line 26 | ======================================= 27 | 28 | After installation, you will get a `sqllineage` command. It has two major options: 29 | 30 | - -e option let you pass a quoted query string as SQL statements 31 | - -f option let you pass a file that contains SQL statements 32 | 33 | .. code-block:: bash 34 | 35 | $ sqllineage -e "insert into table_foo select * from table_bar union select * from table_baz" 36 | Statements(#): 1 37 | Source Tables: 38 | .table_bar 39 | .table_baz 40 | Target Tables: 41 | .table_foo 42 | 43 | 44 | .. code-block:: bash 45 | 46 | $ sqllineage -f foo.sql 47 | Statements(#): 1 48 | Source Tables: 49 | .table_bar 50 | .table_baz 51 | Target Tables: 52 | .table_foo 53 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /hatch_build.py: -------------------------------------------------------------------------------- 1 | """ 2 | Hatch build hook for building frontend assets. 3 | """ 4 | 5 | import os 6 | import platform 7 | import shlex 8 | import shutil 9 | import subprocess 10 | from pathlib import Path 11 | 12 | from hatchling.builders.hooks.plugin.interface import BuildHookInterface 13 | 14 | 15 | class FrontendBuildHook(BuildHookInterface): 16 | """ 17 | A Hatch build hook to build frontend assets using npm. 18 | This hook runs during the build process to ensure that the frontend assets are built and included in the package. 19 | """ 20 | 21 | PLUGIN_NAME = "frontend" 22 | 23 | def initialize(self, version, build_data): 24 | """ 25 | This hook builds frontend assets in the following scenarios: 26 | 1. Building wheel distribution (python -m build --wheel) 27 | 2. Installing from GitHub (pip install git+https://github.com/reata/sqllineage.git). pip will build sdist first, 28 | then install from sdist. sdist itself does not include built static files, same as source code repo. 29 | Installation from sdist will trigger this hook. (by building a wheel from sdist) 30 | """ 31 | if "READTHEDOCS" in os.environ: 32 | return 33 | py_path = Path("sqllineage") 34 | static_folder = "build" 35 | static_path = py_path / static_folder 36 | js_path = Path("sqllineagejs") 37 | use_shell = True if platform.system() == "Windows" else False 38 | try: 39 | # install npm dependencies 40 | subprocess.check_call( 41 | shlex.split("npm install"), cwd=str(js_path), shell=use_shell 42 | ) 43 | # build the frontend assets 44 | subprocess.check_call( 45 | shlex.split("npm run build"), cwd=str(js_path), shell=use_shell 46 | ) 47 | # move the built assets to the Python package directory 48 | source_build_path = js_path / static_folder 49 | if static_path.exists(): 50 | shutil.rmtree(str(static_path)) 51 | shutil.move(str(source_build_path), str(static_path)) 52 | # add all files in the static_path to build_data artifacts 53 | for file_path in static_path.rglob("*"): 54 | if file_path.is_file(): 55 | relative_path = file_path.relative_to(Path(".")) 56 | build_data.setdefault("artifacts", []).append(str(relative_path)) 57 | except subprocess.CalledProcessError as e: 58 | raise RuntimeError(f"Frontend build failed: {e}") from e 59 | except Exception as e: 60 | raise RuntimeError( 61 | f"An unexpected error occurred during frontend build: {e}" 62 | ) from e 63 | -------------------------------------------------------------------------------- /sqllineage/__init__.py: -------------------------------------------------------------------------------- 1 | NAME = "sqllineage" 2 | VERSION = "1.5.5" 3 | DEFAULT_LOGGING = { 4 | "version": 1, 5 | "disable_existing_loggers": False, 6 | "formatters": {"default": {"format": "%(levelname)s: %(message)s"}}, 7 | "handlers": { 8 | "console": { 9 | "level": "WARNING", 10 | "class": "logging.StreamHandler", 11 | "formatter": "default", 12 | } 13 | }, 14 | "loggers": { 15 | "": { 16 | "handlers": ["console"], 17 | "level": "WARNING", 18 | "propagate": False, 19 | "filters": [], 20 | }, 21 | "werkzeug": { 22 | "handlers": ["console"], 23 | "level": "ERROR", 24 | "propagate": False, 25 | "filters": [], 26 | }, 27 | }, 28 | } 29 | 30 | STATIC_FOLDER = "build" 31 | DEFAULT_HOST = "localhost" 32 | DEFAULT_PORT = 5000 33 | SQLPARSE_DIALECT = "non-validating" 34 | DEFAULT_DIALECT = "ansi" 35 | -------------------------------------------------------------------------------- /sqllineage/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/sqllineage/core/__init__.py -------------------------------------------------------------------------------- /sqllineage/core/analyzer.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | from sqllineage.core.holders import StatementLineageHolder 4 | from sqllineage.core.metadata_provider import MetaDataProvider 5 | 6 | 7 | class LineageAnalyzer: 8 | """SQL Statement Level Lineage Analyzer 9 | Parser specific implementation should inherit this class and implement analyze method 10 | """ 11 | 12 | PARSER_NAME: str = "" 13 | SUPPORTED_DIALECTS: list[str] = [] 14 | 15 | @abstractmethod 16 | def analyze( 17 | self, sql: str, metadata_provider: MetaDataProvider 18 | ) -> StatementLineageHolder: 19 | """ 20 | to analyze single statement sql and store the result into StatementLineageHolder. 21 | 22 | :param sql: single-statement SQL string to be processed 23 | :param metadata_provider: :class:`sqllineage.core.metadata_provider.MetaDataProvider` provides metadata on 24 | tables to help lineage analyzing 25 | :return: :class:`sqllineage.core.holders.StatementLineageHolder` 26 | """ 27 | -------------------------------------------------------------------------------- /sqllineage/core/metadata/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/sqllineage/core/metadata/__init__.py -------------------------------------------------------------------------------- /sqllineage/core/metadata/dummy.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from sqllineage.core.metadata_provider import MetaDataProvider 4 | 5 | 6 | class DummyMetaDataProvider(MetaDataProvider): 7 | """ 8 | A Dummy MetaDataProvider that accept metadata as a dict 9 | """ 10 | 11 | def __init__(self, metadata: Optional[dict[str, list[str]]] = None): 12 | """ 13 | :param metadata: a dict with schema.table name as key and a list of unqualified column name as value 14 | """ 15 | super().__init__() 16 | self.metadata = metadata if metadata is not None else {} 17 | 18 | def _get_table_columns(self, schema: str, table: str, **kwargs) -> list[str]: 19 | return self.metadata.get(f"{schema}.{table}", []) 20 | 21 | def __bool__(self): 22 | return len(self.metadata) > 0 23 | -------------------------------------------------------------------------------- /sqllineage/core/metadata/sqlalchemy.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any, Optional 3 | 4 | from sqlalchemy import MetaData, Table, create_engine, make_url 5 | from sqlalchemy.exc import NoSuchModuleError, NoSuchTableError, OperationalError 6 | 7 | from sqllineage.core.metadata_provider import MetaDataProvider 8 | from sqllineage.exceptions import MetaDataProviderException 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class SQLAlchemyMetaDataProvider(MetaDataProvider): 14 | """ 15 | SQLAlchemyMetaDataProvider queries metadata from database using SQLAlchemy 16 | """ 17 | 18 | def __init__(self, url: str, engine_kwargs: Optional[dict[str, Any]] = None): 19 | """ 20 | :param url: sqlalchemy url 21 | :param engine_kwargs: a dictionary of keyword arguments that will be passed to sqlalchemy create_engine 22 | """ 23 | super().__init__() 24 | self.metadata_obj = MetaData() 25 | try: 26 | if engine_kwargs is None: 27 | engine_kwargs = {} 28 | self.engine = create_engine(url, **engine_kwargs) 29 | except NoSuchModuleError as e: 30 | u = make_url(url) 31 | raise MetaDataProviderException( 32 | f"SQLAlchemy dialect driver {u.drivername} is not installed correctly" 33 | ) from e 34 | try: 35 | self.engine.connect() 36 | except OperationalError as e: 37 | raise MetaDataProviderException(f"Could not connect to {url}") from e 38 | 39 | def __del__(self): 40 | # dispose the engine to close all connections 41 | if hasattr(self, "engine") and self.engine is not None: 42 | self.engine.dispose() 43 | 44 | def _get_table_columns(self, schema: str, table: str, **kwargs) -> list[str]: 45 | columns = [] 46 | try: 47 | sqlalchemy_table = Table( 48 | table, self.metadata_obj, schema=schema, autoload_with=self.engine 49 | ) 50 | columns = [c.name for c in sqlalchemy_table.columns] 51 | except (NoSuchTableError, OperationalError): 52 | logger.warning( 53 | "error listing columns for table %s.%s in %s, return empty list instead", 54 | schema, 55 | table, 56 | self.engine.url, 57 | ) 58 | return columns 59 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlfluff/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/sqllineage/core/parser/sqlfluff/__init__.py -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlfluff/extractors/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | import pkgutil 4 | 5 | # import each module so that BaseExtractor's __subclasses__ will work 6 | for module in pkgutil.iter_modules([os.path.dirname(__file__)]): 7 | importlib.import_module(__name__ + "." + module.name) 8 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlfluff/extractors/copy.py: -------------------------------------------------------------------------------- 1 | from sqlfluff.core.parser import BaseSegment 2 | 3 | from sqllineage.core.holders import StatementLineageHolder 4 | from sqllineage.core.models import Path 5 | from sqllineage.core.parser.sqlfluff.extractors.base import BaseExtractor 6 | from sqllineage.core.parser.sqlfluff.utils import ( 7 | find_from_expression_element, 8 | list_child_segments, 9 | ) 10 | from sqllineage.utils.entities import AnalyzerContext 11 | from sqllineage.utils.helpers import escape_identifier_name 12 | 13 | 14 | class CopyExtractor(BaseExtractor): 15 | """ 16 | Copy statement lineage extractor 17 | """ 18 | 19 | SUPPORTED_STMT_TYPES = [ 20 | "copy_statement", 21 | "copy_into_table_statement", 22 | ] 23 | 24 | def extract( 25 | self, statement: BaseSegment, context: AnalyzerContext 26 | ) -> StatementLineageHolder: 27 | holder = StatementLineageHolder() 28 | src_flag = tgt_flag = False 29 | for segment in list_child_segments(statement): 30 | if segment.type == "from_clause": 31 | if from_expression_element := find_from_expression_element(segment): 32 | for table_expression in from_expression_element.get_children( 33 | "table_expression" 34 | ): 35 | if storage_location := table_expression.get_child( 36 | "storage_location" 37 | ): 38 | holder.add_read(Path(storage_location.raw)) 39 | elif segment.type == "keyword": 40 | if segment.raw_upper in ["COPY", "INTO"]: 41 | tgt_flag = True 42 | elif segment.raw_upper == "FROM": 43 | src_flag = True 44 | continue 45 | 46 | if tgt_flag: 47 | if table := self.find_table(segment): 48 | holder.add_write(table) 49 | tgt_flag = False 50 | if src_flag: 51 | if segment.type in ["literal", "storage_location"]: 52 | path = Path(escape_identifier_name(segment.raw)) 53 | holder.add_read(path) 54 | src_flag = False 55 | 56 | return holder 57 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlfluff/extractors/cte.py: -------------------------------------------------------------------------------- 1 | from sqlfluff.core.parser import BaseSegment 2 | 3 | from sqllineage.core.holders import SubQueryLineageHolder 4 | from sqllineage.core.parser.sqlfluff.extractors.base import BaseExtractor 5 | from sqllineage.core.parser.sqlfluff.extractors.create_insert import ( 6 | CreateInsertExtractor, 7 | ) 8 | from sqllineage.core.parser.sqlfluff.extractors.select import SelectExtractor 9 | from sqllineage.core.parser.sqlfluff.extractors.update import UpdateExtractor 10 | from sqllineage.core.parser.sqlfluff.models import SqlFluffSubQuery 11 | from sqllineage.core.parser.sqlfluff.utils import list_child_segments 12 | from sqllineage.utils.entities import AnalyzerContext 13 | 14 | 15 | class CteExtractor(BaseExtractor): 16 | """ 17 | CTE queries lineage extractor 18 | """ 19 | 20 | SUPPORTED_STMT_TYPES = ["with_compound_statement"] 21 | 22 | def extract( 23 | self, 24 | statement: BaseSegment, 25 | context: AnalyzerContext, 26 | ) -> SubQueryLineageHolder: 27 | holder = self._init_holder(context) 28 | subqueries = [] 29 | for segment in list_child_segments(statement): 30 | if segment.type in ["select_statement", "set_expression"]: 31 | holder |= self.delegate_to( 32 | SelectExtractor, 33 | segment, 34 | AnalyzerContext( 35 | cte=holder.cte, 36 | write=holder.write, 37 | write_columns=holder.write_columns, 38 | ), 39 | ) 40 | elif segment.type == "insert_statement": 41 | holder |= self.delegate_to( 42 | CreateInsertExtractor, segment, AnalyzerContext(cte=holder.cte) 43 | ) 44 | elif segment.type == "update_statement": 45 | holder |= self.delegate_to( 46 | UpdateExtractor, segment, AnalyzerContext(cte=holder.cte) 47 | ) 48 | elif segment.type == "common_table_expression": 49 | alias = None 50 | sub_segments = list_child_segments(segment) 51 | for sub_segment in sub_segments: 52 | if sub_segment.type == "identifier": 53 | alias = sub_segment.raw 54 | elif sub_segment.type == "bracketed": 55 | for sq in self.list_subquery(sub_segment): 56 | sq.alias = alias 57 | subqueries.append(sq) 58 | holder.add_cte(SqlFluffSubQuery.of(sub_segment, alias)) 59 | 60 | self.extract_subquery(subqueries, holder) 61 | 62 | return holder 63 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlfluff/extractors/drop.py: -------------------------------------------------------------------------------- 1 | from sqlfluff.core.parser import BaseSegment 2 | 3 | from sqllineage.core.holders import StatementLineageHolder 4 | from sqllineage.core.parser.sqlfluff.extractors.base import BaseExtractor 5 | from sqllineage.core.parser.sqlfluff.utils import list_child_segments 6 | from sqllineage.utils.entities import AnalyzerContext 7 | 8 | 9 | class DropExtractor(BaseExtractor): 10 | """ 11 | Drop statement lineage extractor 12 | """ 13 | 14 | SUPPORTED_STMT_TYPES = ["drop_table_statement", "drop_view_statement"] 15 | 16 | def extract( 17 | self, 18 | statement: BaseSegment, 19 | context: AnalyzerContext, 20 | ) -> StatementLineageHolder: 21 | holder = StatementLineageHolder() 22 | drop_flag = False 23 | for segment in list_child_segments(statement): 24 | if ( 25 | segment.type == "keyword" 26 | and segment.raw_upper in ["TABLE", "VIEW"] 27 | or (drop_flag is True and segment.raw_upper in ["IF", "EXISTS"]) 28 | ): 29 | drop_flag = True 30 | continue 31 | if drop_flag: 32 | if table := self.find_table(segment): 33 | holder.add_drop(table) 34 | drop_flag = False 35 | return holder 36 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlfluff/extractors/noop.py: -------------------------------------------------------------------------------- 1 | from sqlfluff.core.parser import BaseSegment 2 | 3 | from sqllineage.core.holders import StatementLineageHolder 4 | from sqllineage.core.parser.sqlfluff.extractors.base import BaseExtractor 5 | from sqllineage.utils.entities import AnalyzerContext 6 | 7 | 8 | class NoopExtractor(BaseExtractor): 9 | """ 10 | Extractor for queries which do not provide any lineage 11 | """ 12 | 13 | SUPPORTED_STMT_TYPES = [ 14 | "delete_statement", 15 | "truncate_table", 16 | "refresh_statement", 17 | "cache_table", 18 | "uncache_table", 19 | "show_statement", 20 | "describe_statement", 21 | "use_statement", 22 | "declare_segment", 23 | "analyze_statement", 24 | "add_jar_statement", 25 | "create_function_statement", 26 | "drop_function_statement", 27 | "set_statement", 28 | ] 29 | 30 | def extract( 31 | self, 32 | statement: BaseSegment, 33 | context: AnalyzerContext, 34 | ) -> StatementLineageHolder: 35 | return StatementLineageHolder() 36 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlfluff/extractors/rename.py: -------------------------------------------------------------------------------- 1 | from sqlfluff.core.parser import BaseSegment 2 | 3 | from sqllineage.core.holders import StatementLineageHolder 4 | from sqllineage.core.parser.sqlfluff.extractors.base import BaseExtractor 5 | from sqllineage.utils.entities import AnalyzerContext 6 | 7 | 8 | class RenameExtractor(BaseExtractor): 9 | """ 10 | Rename statement lineage extractor 11 | """ 12 | 13 | SUPPORTED_STMT_TYPES = [ 14 | "alter_table_statement", 15 | "rename_statement", 16 | "rename_table_statement", 17 | ] 18 | 19 | def extract( 20 | self, 21 | statement: BaseSegment, 22 | context: AnalyzerContext, 23 | ) -> StatementLineageHolder: 24 | holder = StatementLineageHolder() 25 | tables = [] 26 | for t in statement.segments: 27 | if table := self.find_table(t): 28 | tables.append(table) 29 | keywords = [t for t in statement.segments if t.type == "keyword"] 30 | if any(k.raw_upper == "RENAME" for k in keywords) and len(tables) % 2 == 0: 31 | for i in range(0, len(tables), 2): 32 | holder.add_rename(tables[i], tables[i + 1]) 33 | elif ( 34 | any(k.raw_upper in ["EXCHANGE", "SWAP"] for k in keywords) 35 | and len(tables) == 2 36 | ): 37 | # ALTER TABLE EXCHANGE PARTITION/SWAP 38 | holder.add_write(tables[0]) 39 | holder.add_read(tables[1]) 40 | return holder 41 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlfluff/extractors/unload.py: -------------------------------------------------------------------------------- 1 | from sqlfluff.core.parser import BaseSegment 2 | 3 | from sqllineage.core.holders import StatementLineageHolder 4 | from sqllineage.core.models import Path 5 | from sqllineage.core.parser.sqlfluff.extractors.base import BaseExtractor 6 | from sqllineage.utils.entities import AnalyzerContext 7 | 8 | 9 | class UnloadExtractor(BaseExtractor): 10 | """ 11 | Unload statement lineage extractor 12 | """ 13 | 14 | SUPPORTED_STMT_TYPES = [ 15 | "unload_statement", 16 | ] 17 | 18 | def extract( 19 | self, statement: BaseSegment, context: AnalyzerContext 20 | ) -> StatementLineageHolder: 21 | holder = StatementLineageHolder() 22 | if bracketed := statement.get_child("bracketed"): 23 | if quoted_literal := bracketed.get_child("quoted_literal"): 24 | raw_sql = quoted_literal.raw.strip("'").strip('"') 25 | holder |= self._analyze_inner_sql(raw_sql) 26 | if quoted_literal := statement.get_child("quoted_literal"): 27 | raw_path = quoted_literal.raw.strip("'").strip('"') 28 | holder.add_write(Path(raw_path)) 29 | return holder 30 | 31 | def _analyze_inner_sql(self, sql: str) -> StatementLineageHolder: 32 | from sqllineage.core.parser.sqlfluff.analyzer import SqlFluffLineageAnalyzer 33 | 34 | analyzer = SqlFluffLineageAnalyzer("", self.dialect) 35 | return analyzer.analyze(sql, self.metadata_provider) 36 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlparse/__init__.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from sqlparse import tokens 4 | from sqlparse.keywords import KEYWORDS, SQL_REGEX 5 | 6 | 7 | def _patch_adding_builtin_type() -> None: 8 | KEYWORDS["STRING"] = tokens.Name.Builtin 9 | KEYWORDS["DATETIME"] = tokens.Name.Builtin 10 | 11 | 12 | def _patch_updating_lateral_view_lexeme() -> None: 13 | for i, (regex, lexeme) in enumerate(SQL_REGEX): 14 | rgx = re.compile(regex, re.IGNORECASE | re.UNICODE).match 15 | if rgx("LATERAL VIEW EXPLODE(col)"): 16 | new_regex = r"(LATERAL\s+VIEW\s+)(OUTER\s+)?(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK|JSON_TUPLE)\b" 17 | SQL_REGEX[i] = (new_regex, lexeme) 18 | break 19 | 20 | 21 | def _monkey_patch() -> None: 22 | _patch_adding_builtin_type() 23 | _patch_updating_lateral_view_lexeme() 24 | 25 | 26 | _monkey_patch() 27 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlparse/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | import pkgutil 4 | 5 | # Later we'll use BaseHandler's __subclasses__ hook to call each subclass, for that to work, we'll need to make sure 6 | # each module the subclass in is imported before calling the hook 7 | for module in pkgutil.iter_modules([os.path.dirname(__file__)]): 8 | importlib.import_module(__name__ + "." + module.name) 9 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlparse/handlers/base.py: -------------------------------------------------------------------------------- 1 | from sqlparse.sql import Token 2 | 3 | from sqllineage.core.holders import SubQueryLineageHolder 4 | 5 | 6 | class NextTokenBaseHandler: 7 | """ 8 | This is to address an extract pattern when a specified token indicates we should extract something from next token. 9 | """ 10 | 11 | def __init__(self) -> None: 12 | self.indicator = False 13 | 14 | def _indicate(self, token: Token) -> bool: 15 | """ 16 | Whether current token indicates a following token to be handled or not. 17 | """ 18 | raise NotImplementedError 19 | 20 | def _handle(self, token: Token, holder: SubQueryLineageHolder) -> None: 21 | """ 22 | Handle the indicated token, and update the lineage result accordingly 23 | """ 24 | raise NotImplementedError 25 | 26 | def indicate(self, token: Token): 27 | """ 28 | Set indicator to True only when _indicate returns True 29 | """ 30 | self.indicator = self._indicate(token) 31 | 32 | def handle(self, token: Token, holder: SubQueryLineageHolder): 33 | """ 34 | Handle and set indicator back to False 35 | """ 36 | if self.indicator: 37 | self._handle(token, holder) 38 | self.indicator = False 39 | 40 | def end_of_query_cleanup(self, holder: SubQueryLineageHolder) -> None: 41 | """ 42 | Optional hook to be called at the end of statement or subquery 43 | """ 44 | pass 45 | 46 | 47 | class CurrentTokenBaseHandler: 48 | """ 49 | This is to address an extract pattern when we should extract something from current token 50 | """ 51 | 52 | def handle(self, token: Token, holder: SubQueryLineageHolder) -> None: 53 | raise NotImplementedError 54 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlparse/handlers/cte.py: -------------------------------------------------------------------------------- 1 | from sqlparse.sql import Function, Identifier, IdentifierList, Token 2 | 3 | from sqllineage.core.holders import SubQueryLineageHolder 4 | from sqllineage.core.parser.sqlparse.handlers.base import NextTokenBaseHandler 5 | from sqllineage.core.parser.sqlparse.models import SqlParseSubQuery 6 | 7 | 8 | class CTEHandler(NextTokenBaseHandler): 9 | """Common Table Expression (With Queries) Handler.""" 10 | 11 | CTE_TOKENS = ("WITH",) 12 | 13 | def _indicate(self, token: Token) -> bool: 14 | return token.normalized in self.CTE_TOKENS 15 | 16 | def _handle(self, token: Token, holder: SubQueryLineageHolder) -> None: 17 | # when CTE used without AS, it will be parsed as Function. This syntax is valid in SparkSQL 18 | cte_token_types = (Identifier, Function) 19 | if isinstance(token, cte_token_types): 20 | cte = [token] 21 | elif isinstance(token, IdentifierList): 22 | cte = [ 23 | token for token in token.tokens if isinstance(token, cte_token_types) 24 | ] 25 | else: 26 | # CREATE TABLE tbl1 (col1 VARCHAR) WITH (bucketed_by = ARRAY['col1'], bucket_count = 256). 27 | # This syntax is valid for bucketing in Trino and not the CTE, token will be Parenthesis here 28 | cte = [] 29 | for token in cte: 30 | sublist = list(token.get_sublists()) 31 | if sublist and not (isinstance(sublist[0], Function)): 32 | # CTE: tbl AS (SELECT 1), tbl is alias and (SELECT 1) is subquery Parenthesis 33 | holder.add_cte(SqlParseSubQuery.of(sublist[0], token.get_real_name())) 34 | -------------------------------------------------------------------------------- /sqllineage/core/parser/sqlparse/handlers/swap_partition.py: -------------------------------------------------------------------------------- 1 | from sqlparse.sql import Function, Token 2 | 3 | from sqllineage.core.holders import SubQueryLineageHolder 4 | from sqllineage.core.parser.sqlparse.handlers.base import CurrentTokenBaseHandler 5 | from sqllineage.core.parser.sqlparse.models import SqlParseTable 6 | from sqllineage.utils.helpers import escape_identifier_name 7 | 8 | 9 | class SwapPartitionHandler(CurrentTokenBaseHandler): 10 | """ 11 | a special handling for swap_partitions_between_tables function of Vertica SQL dialect. 12 | """ 13 | 14 | def handle(self, token: Token, holder: SubQueryLineageHolder) -> None: 15 | if ( 16 | isinstance(token, Function) 17 | and token.get_name().lower() == "swap_partitions_between_tables" 18 | ): 19 | _, parenthesis = token.tokens 20 | _, identifier_list, _ = parenthesis.tokens 21 | identifiers = list(identifier_list.get_identifiers()) 22 | holder.add_read( 23 | SqlParseTable(escape_identifier_name(identifiers[0].normalized)) 24 | ) 25 | holder.add_write( 26 | SqlParseTable(escape_identifier_name(identifiers[3].normalized)) 27 | ) 28 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query01.sql: -------------------------------------------------------------------------------- 1 | with customer_total_return as 2 | (select sr_customer_sk as ctr_customer_sk 3 | , sr_store_sk as ctr_store_sk 4 | , sum(SR_RETURN_AMT) as ctr_total_return 5 | from store_returns 6 | , date_dim 7 | where sr_returned_date_sk = d_date_sk 8 | and d_year = 2000 9 | group by sr_customer_sk 10 | , sr_store_sk) 11 | insert into query01 12 | select c_customer_id 13 | from customer_total_return ctr1 14 | , store 15 | , customer 16 | where ctr1.ctr_total_return > (select avg(ctr_total_return) * 1.2 17 | from customer_total_return ctr2 18 | where ctr1.ctr_store_sk = ctr2.ctr_store_sk) 19 | and s_store_sk = ctr1.ctr_store_sk 20 | and s_state = 'TN' 21 | and ctr1.ctr_customer_sk = c_customer_sk 22 | order by c_customer_id 23 | limit 100; 24 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query02.sql: -------------------------------------------------------------------------------- 1 | with wscs as 2 | (select sold_date_sk 3 | , sales_price 4 | from (select ws_sold_date_sk sold_date_sk 5 | , ws_ext_sales_price sales_price 6 | from web_sales 7 | union all 8 | select cs_sold_date_sk sold_date_sk 9 | , cs_ext_sales_price sales_price 10 | from catalog_sales) x), 11 | wswscs as 12 | (select d_week_seq, 13 | sum(case when (d_day_name = 'Sunday') then sales_price else null end) sun_sales, 14 | sum(case when (d_day_name = 'Monday') then sales_price else null end) mon_sales, 15 | sum(case when (d_day_name = 'Tuesday') then sales_price else null end) tue_sales, 16 | sum(case when (d_day_name = 'Wednesday') then sales_price else null end) wed_sales, 17 | sum(case when (d_day_name = 'Thursday') then sales_price else null end) thu_sales, 18 | sum(case when (d_day_name = 'Friday') then sales_price else null end) fri_sales, 19 | sum(case when (d_day_name = 'Saturday') then sales_price else null end) sat_sales 20 | from wscs 21 | , date_dim 22 | where d_date_sk = sold_date_sk 23 | group by d_week_seq) 24 | insert into query02 25 | select d_week_seq1 26 | , round(sun_sales1 / sun_sales2, 2) 27 | , round(mon_sales1 / mon_sales2, 2) 28 | , round(tue_sales1 / tue_sales2, 2) 29 | , round(wed_sales1 / wed_sales2, 2) 30 | , round(thu_sales1 / thu_sales2, 2) 31 | , round(fri_sales1 / fri_sales2, 2) 32 | , round(sat_sales1 / sat_sales2, 2) 33 | from (select wswscs.d_week_seq d_week_seq1 34 | , sun_sales sun_sales1 35 | , mon_sales mon_sales1 36 | , tue_sales tue_sales1 37 | , wed_sales wed_sales1 38 | , thu_sales thu_sales1 39 | , fri_sales fri_sales1 40 | , sat_sales sat_sales1 41 | from wswscs, 42 | date_dim 43 | where date_dim.d_week_seq = wswscs.d_week_seq 44 | and d_year = 2001) y, 45 | (select wswscs.d_week_seq d_week_seq2 46 | , sun_sales sun_sales2 47 | , mon_sales mon_sales2 48 | , tue_sales tue_sales2 49 | , wed_sales wed_sales2 50 | , thu_sales thu_sales2 51 | , fri_sales fri_sales2 52 | , sat_sales sat_sales2 53 | from wswscs 54 | , date_dim 55 | where date_dim.d_week_seq = wswscs.d_week_seq 56 | and d_year = 2001 + 1) z 57 | where d_week_seq1 = d_week_seq2 - 53 58 | order by d_week_seq1; 59 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query03.sql: -------------------------------------------------------------------------------- 1 | insert into query03 2 | select dt.d_year 3 | , item.i_brand_id brand_id 4 | , item.i_brand brand 5 | , sum(ss_ext_sales_price) sum_agg 6 | from date_dim dt 7 | , store_sales 8 | , item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manufact_id = 128 12 | and dt.d_moy = 11 13 | group by dt.d_year 14 | , item.i_brand 15 | , item.i_brand_id 16 | order by dt.d_year 17 | , sum_agg desc 18 | , brand_id 19 | limit 100; 20 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query06.sql: -------------------------------------------------------------------------------- 1 | insert into query06 2 | select a.ca_state state, count(*) cnt 3 | from customer_address a 4 | , customer c 5 | , store_sales s 6 | , date_dim d 7 | , item i 8 | where a.ca_address_sk = c.c_current_addr_sk 9 | and c.c_customer_sk = s.ss_customer_sk 10 | and s.ss_sold_date_sk = d.d_date_sk 11 | and s.ss_item_sk = i.i_item_sk 12 | and d.d_month_seq = 13 | (select distinct (d_month_seq) 14 | from date_dim 15 | where d_year = 2001 16 | and d_moy = 1) 17 | and i.i_current_price > 1.2 * 18 | (select avg(j.i_current_price) 19 | from item j 20 | where j.i_category = i.i_category) 21 | group by a.ca_state 22 | having count(*) >= 10 23 | order by cnt 24 | limit 100; 25 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query07.sql: -------------------------------------------------------------------------------- 1 | insert into query07 2 | select i_item_id, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | from store_sales, 8 | customer_demographics, 9 | date_dim, 10 | item, 11 | promotion 12 | where ss_sold_date_sk = d_date_sk 13 | and ss_item_sk = i_item_sk 14 | and ss_cdemo_sk = cd_demo_sk 15 | and ss_promo_sk = p_promo_sk 16 | and cd_gender = 'M' 17 | and cd_marital_status = 'S' 18 | and cd_education_status = 'College' 19 | and (p_channel_email = 'N' or p_channel_event = 'N') 20 | and d_year = 2000 21 | group by i_item_id 22 | order by i_item_id 23 | limit 100; 24 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query09.sql: -------------------------------------------------------------------------------- 1 | insert into query09 2 | select case 3 | when (select count(*) 4 | from store_sales 5 | where ss_quantity between 1 and 20) > 74129 6 | then (select avg(ss_ext_discount_amt) 7 | from store_sales 8 | where ss_quantity between 1 and 20) 9 | else (select avg(ss_net_paid) 10 | from store_sales 11 | where ss_quantity between 1 and 20) end bucket1, 12 | case 13 | when (select count(*) 14 | from store_sales 15 | where ss_quantity between 21 and 40) > 122840 16 | then (select avg(ss_ext_discount_amt) 17 | from store_sales 18 | where ss_quantity between 21 and 40) 19 | else (select avg(ss_net_paid) 20 | from store_sales 21 | where ss_quantity between 21 and 40) end bucket2, 22 | case 23 | when (select count(*) 24 | from store_sales 25 | where ss_quantity between 41 and 60) > 56580 26 | then (select avg(ss_ext_discount_amt) 27 | from store_sales 28 | where ss_quantity between 41 and 60) 29 | else (select avg(ss_net_paid) 30 | from store_sales 31 | where ss_quantity between 41 and 60) end bucket3, 32 | case 33 | when (select count(*) 34 | from store_sales 35 | where ss_quantity between 61 and 80) > 10097 36 | then (select avg(ss_ext_discount_amt) 37 | from store_sales 38 | where ss_quantity between 61 and 80) 39 | else (select avg(ss_net_paid) 40 | from store_sales 41 | where ss_quantity between 61 and 80) end bucket4, 42 | case 43 | when (select count(*) 44 | from store_sales 45 | where ss_quantity between 81 and 100) > 165306 46 | then (select avg(ss_ext_discount_amt) 47 | from store_sales 48 | where ss_quantity between 81 and 100) 49 | else (select avg(ss_net_paid) 50 | from store_sales 51 | where ss_quantity between 81 and 100) end bucket5 52 | from reason 53 | where r_reason_sk = 1 54 | ; 55 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query10.sql: -------------------------------------------------------------------------------- 1 | insert into query10 2 | select cd_gender, 3 | cd_marital_status, 4 | cd_education_status, 5 | count(*) cnt1, 6 | cd_purchase_estimate, 7 | count(*) cnt2, 8 | cd_credit_rating, 9 | count(*) cnt3, 10 | cd_dep_count, 11 | count(*) cnt4, 12 | cd_dep_employed_count, 13 | count(*) cnt5, 14 | cd_dep_college_count, 15 | count(*) cnt6 16 | from customer c, 17 | customer_address ca, 18 | customer_demographics 19 | where c.c_current_addr_sk = ca.ca_address_sk 20 | and ca_county in ('Rush County', 'Toole County', 'Jefferson County', 'Dona Ana County', 'La Porte County') 21 | and cd_demo_sk = c.c_current_cdemo_sk 22 | and exists (select * 23 | from store_sales, 24 | date_dim 25 | where c.c_customer_sk = ss_customer_sk 26 | and ss_sold_date_sk = d_date_sk 27 | and d_year = 2002 28 | and d_moy between 1 and 1 + 3) 29 | and (exists (select * 30 | from web_sales, 31 | date_dim 32 | where c.c_customer_sk = ws_bill_customer_sk 33 | and ws_sold_date_sk = d_date_sk 34 | and d_year = 2002 35 | and d_moy between 1 ANd 1 + 3) or 36 | exists (select * 37 | from catalog_sales, 38 | date_dim 39 | where c.c_customer_sk = cs_ship_customer_sk 40 | and cs_sold_date_sk = d_date_sk 41 | and d_year = 2002 42 | and d_moy between 1 and 1 + 3)) 43 | group by cd_gender, 44 | cd_marital_status, 45 | cd_education_status, 46 | cd_purchase_estimate, 47 | cd_credit_rating, 48 | cd_dep_count, 49 | cd_dep_employed_count, 50 | cd_dep_college_count 51 | order by cd_gender, 52 | cd_marital_status, 53 | cd_education_status, 54 | cd_purchase_estimate, 55 | cd_credit_rating, 56 | cd_dep_count, 57 | cd_dep_employed_count, 58 | cd_dep_college_count 59 | limit 100; 60 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query12.sql: -------------------------------------------------------------------------------- 1 | insert into query12 2 | select i_item_id 3 | , i_item_desc 4 | , i_category 5 | , i_class 6 | , i_current_price 7 | , sum(ws_ext_sales_price) as itemrevenue 8 | , sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price)) over 9 | (partition by i_class) as revenueratio 10 | from web_sales 11 | , item 12 | , date_dim 13 | where ws_item_sk = i_item_sk 14 | and i_category in ('Sports', 'Books', 'Home') 15 | and ws_sold_date_sk = d_date_sk 16 | and d_date between cast('1999-02-22' as date) 17 | and date_add(cast('1999-02-22' as date), 30) 18 | group by i_item_id 19 | , i_item_desc 20 | , i_category 21 | , i_class 22 | , i_current_price 23 | order by i_category 24 | , i_class 25 | , i_item_id 26 | , i_item_desc 27 | , revenueratio 28 | limit 100; 29 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query13.sql: -------------------------------------------------------------------------------- 1 | insert into query13 2 | select avg(ss_quantity) 3 | , avg(ss_ext_sales_price) 4 | , avg(ss_ext_wholesale_cost) 5 | , sum(ss_ext_wholesale_cost) 6 | from store_sales 7 | , store 8 | , customer_demographics 9 | , household_demographics 10 | , customer_address 11 | , date_dim 12 | where s_store_sk = ss_store_sk 13 | and ss_sold_date_sk = d_date_sk 14 | and d_year = 2001 15 | and ((ss_hdemo_sk = hd_demo_sk 16 | and cd_demo_sk = ss_cdemo_sk 17 | and cd_marital_status = 'M' 18 | and cd_education_status = 'Advanced Degree' 19 | and ss_sales_price between 100.00 and 150.00 20 | and hd_dep_count = 3 21 | ) or 22 | (ss_hdemo_sk = hd_demo_sk 23 | and cd_demo_sk = ss_cdemo_sk 24 | and cd_marital_status = 'S' 25 | and cd_education_status = 'College' 26 | and ss_sales_price between 50.00 and 100.00 27 | and hd_dep_count = 1 28 | ) or 29 | (ss_hdemo_sk = hd_demo_sk 30 | and cd_demo_sk = ss_cdemo_sk 31 | and cd_marital_status = 'W' 32 | and cd_education_status = '2 yr Degree' 33 | and ss_sales_price between 150.00 and 200.00 34 | and hd_dep_count = 1 35 | )) 36 | and ((ss_addr_sk = ca_address_sk 37 | and ca_country = 'United States' 38 | and ca_state in ('TX', 'OH', 'TX') 39 | and ss_net_profit between 100 and 200 40 | ) or 41 | (ss_addr_sk = ca_address_sk 42 | and ca_country = 'United States' 43 | and ca_state in ('OR', 'NM', 'KY') 44 | and ss_net_profit between 150 and 300 45 | ) or 46 | (ss_addr_sk = ca_address_sk 47 | and ca_country = 'United States' 48 | and ca_state in ('VA', 'TX', 'MS') 49 | and ss_net_profit between 50 and 250 50 | )) 51 | ; 52 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query15.sql: -------------------------------------------------------------------------------- 1 | insert into query15 2 | select ca_zip 3 | , sum(cs_sales_price) 4 | from catalog_sales 5 | , customer 6 | , customer_address 7 | , date_dim 8 | where cs_bill_customer_sk = c_customer_sk 9 | and c_current_addr_sk = ca_address_sk 10 | and (substr(ca_zip, 1, 5) in ('85669', '86197', '88274', '83405', '86475', 11 | '85392', '85460', '80348', '81792') 12 | or ca_state in ('CA', 'WA', 'GA') 13 | or cs_sales_price > 500) 14 | and cs_sold_date_sk = d_date_sk 15 | and d_qoy = 2 16 | and d_year = 2001 17 | group by ca_zip 18 | order by ca_zip 19 | limit 100; 20 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query16.sql: -------------------------------------------------------------------------------- 1 | insert into query16 2 | select count(distinct cs_order_number) as order_count 3 | , sum(cs_ext_ship_cost) as total_shipping_cost 4 | , sum(cs_net_profit) as total_net_profit 5 | from catalog_sales cs1 6 | , date_dim 7 | , customer_address 8 | , call_center 9 | where d_date between cast('2002-2-01' as date) and 10 | date_add(cast('2002-2-01' as date), 60) 11 | and cs1.cs_ship_date_sk = d_date_sk 12 | and cs1.cs_ship_addr_sk = ca_address_sk 13 | and ca_state = 'GA' 14 | and cs1.cs_call_center_sk = cc_call_center_sk 15 | and cc_county in ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 16 | 'Williamson County' 17 | ) 18 | and exists (select * 19 | from catalog_sales cs2 20 | where cs1.cs_order_number = cs2.cs_order_number 21 | and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) 22 | and not exists (select * 23 | from catalog_returns cr1 24 | where cs1.cs_order_number = cr1.cr_order_number) 25 | order by count(distinct cs_order_number) 26 | limit 100; 27 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query17.sql: -------------------------------------------------------------------------------- 1 | insert into query17 2 | select i_item_id 3 | , i_item_desc 4 | , s_state 5 | , count(ss_quantity) as store_sales_quantitycount 6 | , avg(ss_quantity) as store_sales_quantityave 7 | , stddev_samp(ss_quantity) as store_sales_quantitystdev 8 | , stddev_samp(ss_quantity) / avg(ss_quantity) as store_sales_quantitycov 9 | , count(sr_return_quantity) as store_returns_quantitycount 10 | , avg(sr_return_quantity) as store_returns_quantityave 11 | , stddev_samp(sr_return_quantity) as store_returns_quantitystdev 12 | , stddev_samp(sr_return_quantity) / avg(sr_return_quantity) as store_returns_quantitycov 13 | , count(cs_quantity) as catalog_sales_quantitycount 14 | , avg(cs_quantity) as catalog_sales_quantityave 15 | , stddev_samp(cs_quantity) as catalog_sales_quantitystdev 16 | , stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitycov 17 | from store_sales 18 | , store_returns 19 | , catalog_sales 20 | , date_dim d1 21 | , date_dim d2 22 | , date_dim d3 23 | , store 24 | , item 25 | where d1.d_quarter_name = '2001Q1' 26 | and d1.d_date_sk = ss_sold_date_sk 27 | and i_item_sk = ss_item_sk 28 | and s_store_sk = ss_store_sk 29 | and ss_customer_sk = sr_customer_sk 30 | and ss_item_sk = sr_item_sk 31 | and ss_ticket_number = sr_ticket_number 32 | and sr_returned_date_sk = d2.d_date_sk 33 | and d2.d_quarter_name in ('2001Q1', '2001Q2', '2001Q3') 34 | and sr_customer_sk = cs_bill_customer_sk 35 | and sr_item_sk = cs_item_sk 36 | and cs_sold_date_sk = d3.d_date_sk 37 | and d3.d_quarter_name in ('2001Q1', '2001Q2', '2001Q3') 38 | group by i_item_id 39 | , i_item_desc 40 | , s_state 41 | order by i_item_id 42 | , i_item_desc 43 | , s_state 44 | limit 100; 45 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query18.sql: -------------------------------------------------------------------------------- 1 | insert into query18 2 | select i_item_id, 3 | ca_country, 4 | ca_state, 5 | ca_county, 6 | avg( cast(cs_quantity as decimal(12,2))) agg1, 7 | avg( cast(cs_list_price as decimal(12,2))) agg2, 8 | avg( cast(cs_coupon_amt as decimal(12,2))) agg3, 9 | avg( cast(cs_sales_price as decimal(12,2))) agg4, 10 | avg( cast(cs_net_profit as decimal(12,2))) agg5, 11 | avg( cast(c_birth_year as decimal(12,2))) agg6, 12 | avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 13 | from catalog_sales, customer_demographics cd1, 14 | customer_demographics cd2, customer, customer_address, date_dim, item 15 | where cs_sold_date_sk = d_date_sk and 16 | cs_item_sk = i_item_sk and 17 | cs_bill_cdemo_sk = cd1.cd_demo_sk and 18 | cs_bill_customer_sk = c_customer_sk and 19 | cd1.cd_gender = 'F' and 20 | cd1.cd_education_status = 'Unknown' and 21 | c_current_cdemo_sk = cd2.cd_demo_sk and 22 | c_current_addr_sk = ca_address_sk and 23 | c_birth_month in (1,6,8,9,12,2) and 24 | d_year = 1998 and 25 | ca_state in ('MS','IN','ND' 26 | ,'OK','NM','VA','MS') 27 | group by rollup (i_item_id, ca_country, ca_state, ca_county) 28 | order by ca_country, 29 | ca_state, 30 | ca_county, 31 | i_item_id 32 | limit 100; 33 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query19.sql: -------------------------------------------------------------------------------- 1 | insert into query19 2 | select i_brand_id brand_id, 3 | i_brand brand, 4 | i_manufact_id, 5 | i_manufact, 6 | sum(ss_ext_sales_price) ext_price 7 | from date_dim, 8 | store_sales, 9 | item, 10 | customer, 11 | customer_address, 12 | store 13 | where d_date_sk = ss_sold_date_sk 14 | and ss_item_sk = i_item_sk 15 | and i_manager_id = 8 16 | and d_moy = 11 17 | and d_year = 1998 18 | and ss_customer_sk = c_customer_sk 19 | and c_current_addr_sk = ca_address_sk 20 | and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) 21 | and ss_store_sk = s_store_sk 22 | group by i_brand 23 | , i_brand_id 24 | , i_manufact_id 25 | , i_manufact 26 | order by ext_price desc 27 | , i_brand 28 | , i_brand_id 29 | , i_manufact_id 30 | , i_manufact 31 | limit 100; 32 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query20.sql: -------------------------------------------------------------------------------- 1 | insert into query20 2 | select i_item_id 3 | , i_item_desc 4 | , i_category 5 | , i_class 6 | , i_current_price 7 | , sum(cs_ext_sales_price) as itemrevenue 8 | , sum(cs_ext_sales_price) * 100 / sum(sum(cs_ext_sales_price)) over 9 | (partition by i_class) as revenueratio 10 | from catalog_sales 11 | , item 12 | , date_dim 13 | where cs_item_sk = i_item_sk 14 | and i_category in ('Sports', 'Books', 'Home') 15 | and cs_sold_date_sk = d_date_sk 16 | and d_date between cast('1999-02-22' as date) 17 | and date_add(cast('1999-02-22' as date), 30) 18 | group by i_item_id 19 | , i_item_desc 20 | , i_category 21 | , i_class 22 | , i_current_price 23 | order by i_category 24 | , i_class 25 | , i_item_id 26 | , i_item_desc 27 | , revenueratio 28 | limit 100; 29 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query21.sql: -------------------------------------------------------------------------------- 1 | insert into query21 2 | select * 3 | from (select w_warehouse_name 4 | , i_item_id 5 | , sum(case 6 | when (cast(d_date as date) < cast('2000-03-11' as date)) 7 | then inv_quantity_on_hand 8 | else 0 end) as inv_before 9 | , sum(case 10 | when (cast(d_date as date) >= cast('2000-03-11' as date)) 11 | then inv_quantity_on_hand 12 | else 0 end) as inv_after 13 | from inventory 14 | , warehouse 15 | , item 16 | , date_dim 17 | where i_current_price between 0.99 and 1.49 18 | and i_item_sk = inv_item_sk 19 | and inv_warehouse_sk = w_warehouse_sk 20 | and inv_date_sk = d_date_sk 21 | and d_date between date_sub(cast('2000-03-11' as date), 30) 22 | and date_add(cast('2000-03-11' as date), 30) 23 | group by w_warehouse_name, i_item_id) x 24 | where (case 25 | when inv_before > 0 26 | then inv_after / inv_before 27 | else null 28 | end) between 2.0 / 3.0 and 3.0 / 2.0 29 | order by w_warehouse_name 30 | , i_item_id 31 | limit 100; 32 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query22.sql: -------------------------------------------------------------------------------- 1 | insert into query22 2 | select i_product_name 3 | , i_brand 4 | , i_class 5 | , i_category 6 | , avg(inv_quantity_on_hand) qoh 7 | from inventory 8 | , date_dim 9 | , item 10 | where inv_date_sk = d_date_sk 11 | and inv_item_sk = i_item_sk 12 | and d_month_seq between 1200 and 1200 + 11 13 | group by rollup (i_product_name 14 | , i_brand 15 | , i_class 16 | , i_category) 17 | order by qoh, i_product_name, i_brand, i_class, i_category 18 | limit 100; 19 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query25.sql: -------------------------------------------------------------------------------- 1 | insert into query25 2 | select i_item_id 3 | , i_item_desc 4 | , s_store_id 5 | , s_store_name 6 | , sum(ss_net_profit) as store_sales_profit 7 | , sum(sr_net_loss) as store_returns_loss 8 | , sum(cs_net_profit) as catalog_sales_profit 9 | from store_sales 10 | , store_returns 11 | , catalog_sales 12 | , date_dim d1 13 | , date_dim d2 14 | , date_dim d3 15 | , store 16 | , item 17 | where d1.d_moy = 4 18 | and d1.d_year = 2001 19 | and d1.d_date_sk = ss_sold_date_sk 20 | and i_item_sk = ss_item_sk 21 | and s_store_sk = ss_store_sk 22 | and ss_customer_sk = sr_customer_sk 23 | and ss_item_sk = sr_item_sk 24 | and ss_ticket_number = sr_ticket_number 25 | and sr_returned_date_sk = d2.d_date_sk 26 | and d2.d_moy between 4 and 10 27 | and d2.d_year = 2001 28 | and sr_customer_sk = cs_bill_customer_sk 29 | and sr_item_sk = cs_item_sk 30 | and cs_sold_date_sk = d3.d_date_sk 31 | and d3.d_moy between 4 and 10 32 | and d3.d_year = 2001 33 | group by i_item_id 34 | , i_item_desc 35 | , s_store_id 36 | , s_store_name 37 | order by i_item_id 38 | , i_item_desc 39 | , s_store_id 40 | , s_store_name 41 | limit 100; 42 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query26.sql: -------------------------------------------------------------------------------- 1 | insert into query26 2 | select i_item_id, 3 | avg(cs_quantity) agg1, 4 | avg(cs_list_price) agg2, 5 | avg(cs_coupon_amt) agg3, 6 | avg(cs_sales_price) agg4 7 | from catalog_sales, 8 | customer_demographics, 9 | date_dim, 10 | item, 11 | promotion 12 | where cs_sold_date_sk = d_date_sk 13 | and cs_item_sk = i_item_sk 14 | and cs_bill_cdemo_sk = cd_demo_sk 15 | and cs_promo_sk = p_promo_sk 16 | and cd_gender = 'M' 17 | and cd_marital_status = 'S' 18 | and cd_education_status = 'College' 19 | and (p_channel_email = 'N' or p_channel_event = 'N') 20 | and d_year = 2000 21 | group by i_item_id 22 | order by i_item_id 23 | limit 100; 24 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query27.sql: -------------------------------------------------------------------------------- 1 | insert into query27 2 | select i_item_id, 3 | s_state, 4 | grouping(s_state) g_state, 5 | avg(ss_quantity) agg1, 6 | avg(ss_list_price) agg2, 7 | avg(ss_coupon_amt) agg3, 8 | avg(ss_sales_price) agg4 9 | from store_sales, 10 | customer_demographics, 11 | date_dim, 12 | store, 13 | item 14 | where ss_sold_date_sk = d_date_sk 15 | and ss_item_sk = i_item_sk 16 | and ss_store_sk = s_store_sk 17 | and ss_cdemo_sk = cd_demo_sk 18 | and cd_gender = 'M' 19 | and cd_marital_status = 'S' 20 | and cd_education_status = 'College' 21 | and d_year = 2002 22 | and s_state in ('TN', 'TN', 'TN', 'TN', 'TN', 'TN') 23 | group by rollup (i_item_id, s_state) 24 | order by i_item_id 25 | , s_state 26 | limit 100; 27 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query28.sql: -------------------------------------------------------------------------------- 1 | insert into query28 2 | select * 3 | from (select avg(ss_list_price) B1_LP 4 | , count(ss_list_price) B1_CNT 5 | , count(distinct ss_list_price) B1_CNTD 6 | from store_sales 7 | where ss_quantity between 0 and 5 8 | and (ss_list_price between 8 and 8 + 10 9 | or ss_coupon_amt between 459 and 459 + 1000 10 | or ss_wholesale_cost between 57 and 57 + 20)) B1, 11 | (select avg(ss_list_price) B2_LP 12 | , count(ss_list_price) B2_CNT 13 | , count(distinct ss_list_price) B2_CNTD 14 | from store_sales 15 | where ss_quantity between 6 and 10 16 | and (ss_list_price between 90 and 90 + 10 17 | or ss_coupon_amt between 2323 and 2323 + 1000 18 | or ss_wholesale_cost between 31 and 31 + 20)) B2, 19 | (select avg(ss_list_price) B3_LP 20 | , count(ss_list_price) B3_CNT 21 | , count(distinct ss_list_price) B3_CNTD 22 | from store_sales 23 | where ss_quantity between 11 and 15 24 | and (ss_list_price between 142 and 142 + 10 25 | or ss_coupon_amt between 12214 and 12214 + 1000 26 | or ss_wholesale_cost between 79 and 79 + 20)) B3, 27 | (select avg(ss_list_price) B4_LP 28 | , count(ss_list_price) B4_CNT 29 | , count(distinct ss_list_price) B4_CNTD 30 | from store_sales 31 | where ss_quantity between 16 and 20 32 | and (ss_list_price between 135 and 135 + 10 33 | or ss_coupon_amt between 6071 and 6071 + 1000 34 | or ss_wholesale_cost between 38 and 38 + 20)) B4, 35 | (select avg(ss_list_price) B5_LP 36 | , count(ss_list_price) B5_CNT 37 | , count(distinct ss_list_price) B5_CNTD 38 | from store_sales 39 | where ss_quantity between 21 and 25 40 | and (ss_list_price between 122 and 122 + 10 41 | or ss_coupon_amt between 836 and 836 + 1000 42 | or ss_wholesale_cost between 17 and 17 + 20)) B5, 43 | (select avg(ss_list_price) B6_LP 44 | , count(ss_list_price) B6_CNT 45 | , count(distinct ss_list_price) B6_CNTD 46 | from store_sales 47 | where ss_quantity between 26 and 30 48 | and (ss_list_price between 154 and 154 + 10 49 | or ss_coupon_amt between 7326 and 7326 + 1000 50 | or ss_wholesale_cost between 7 and 7 + 20)) B6 51 | limit 100; 52 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query29.sql: -------------------------------------------------------------------------------- 1 | insert into query29 2 | select i_item_id 3 | , i_item_desc 4 | , s_store_id 5 | , s_store_name 6 | , sum(ss_quantity) as store_sales_quantity 7 | , sum(sr_return_quantity) as store_returns_quantity 8 | , sum(cs_quantity) as catalog_sales_quantity 9 | from store_sales 10 | , store_returns 11 | , catalog_sales 12 | , date_dim d1 13 | , date_dim d2 14 | , date_dim d3 15 | , store 16 | , item 17 | where d1.d_moy = 9 18 | and d1.d_year = 1999 19 | and d1.d_date_sk = ss_sold_date_sk 20 | and i_item_sk = ss_item_sk 21 | and s_store_sk = ss_store_sk 22 | and ss_customer_sk = sr_customer_sk 23 | and ss_item_sk = sr_item_sk 24 | and ss_ticket_number = sr_ticket_number 25 | and sr_returned_date_sk = d2.d_date_sk 26 | and d2.d_moy between 9 and 9 + 3 27 | and d2.d_year = 1999 28 | and sr_customer_sk = cs_bill_customer_sk 29 | and sr_item_sk = cs_item_sk 30 | and cs_sold_date_sk = d3.d_date_sk 31 | and d3.d_year in (1999, 1999 + 1, 1999 + 2) 32 | group by i_item_id 33 | , i_item_desc 34 | , s_store_id 35 | , s_store_name 36 | order by i_item_id 37 | , i_item_desc 38 | , s_store_id 39 | , s_store_name 40 | limit 100; 41 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query30.sql: -------------------------------------------------------------------------------- 1 | with customer_total_return as 2 | (select wr_returning_customer_sk as ctr_customer_sk 3 | , 4 | ca_state as ctr_state, 5 | sum(wr_return_amt) as ctr_total_return 6 | from web_returns 7 | , date_dim 8 | , customer_address 9 | where wr_returned_date_sk = d_date_sk 10 | and d_year = 2002 11 | and wr_returning_addr_sk = ca_address_sk 12 | group by wr_returning_customer_sk 13 | , ca_state) 14 | insert into query30 15 | select c_customer_id 16 | , c_salutation 17 | , c_first_name 18 | , c_last_name 19 | , c_preferred_cust_flag 20 | , c_birth_day 21 | , c_birth_month 22 | , c_birth_year 23 | , c_birth_country 24 | , c_login 25 | , c_email_address 26 | , c_last_review_date 27 | , ctr_total_return 28 | from customer_total_return ctr1 29 | , customer_address 30 | , customer 31 | where ctr1.ctr_total_return > (select avg(ctr_total_return) * 1.2 32 | from customer_total_return ctr2 33 | where ctr1.ctr_state = ctr2.ctr_state) 34 | and ca_address_sk = c_current_addr_sk 35 | and ca_state = 'GA' 36 | and ctr1.ctr_customer_sk = c_customer_sk 37 | order by c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag 38 | , c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address 39 | , c_last_review_date, ctr_total_return 40 | limit 100; 41 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query31.sql: -------------------------------------------------------------------------------- 1 | with ss as 2 | (select ca_county, d_qoy, d_year, sum(ss_ext_sales_price) as store_sales 3 | from store_sales, 4 | date_dim, 5 | customer_address 6 | where ss_sold_date_sk = d_date_sk 7 | and ss_addr_sk = ca_address_sk 8 | group by ca_county, d_qoy, d_year), 9 | ws as 10 | (select ca_county, d_qoy, d_year, sum(ws_ext_sales_price) as web_sales 11 | from web_sales, 12 | date_dim, 13 | customer_address 14 | where ws_sold_date_sk = d_date_sk 15 | and ws_bill_addr_sk = ca_address_sk 16 | group by ca_county, d_qoy, d_year) 17 | insert into query31 18 | select ss1.ca_county 19 | , ss1.d_year 20 | , ws2.web_sales / ws1.web_sales web_q1_q2_increase 21 | , ss2.store_sales / ss1.store_sales store_q1_q2_increase 22 | , ws3.web_sales / ws2.web_sales web_q2_q3_increase 23 | , ss3.store_sales / ss2.store_sales store_q2_q3_increase 24 | from ss ss1 25 | , ss ss2 26 | , ss ss3 27 | , ws ws1 28 | , ws ws2 29 | , ws ws3 30 | where ss1.d_qoy = 1 31 | and ss1.d_year = 2000 32 | and ss1.ca_county = ss2.ca_county 33 | and ss2.d_qoy = 2 34 | and ss2.d_year = 2000 35 | and ss2.ca_county = ss3.ca_county 36 | and ss3.d_qoy = 3 37 | and ss3.d_year = 2000 38 | and ss1.ca_county = ws1.ca_county 39 | and ws1.d_qoy = 1 40 | and ws1.d_year = 2000 41 | and ws1.ca_county = ws2.ca_county 42 | and ws2.d_qoy = 2 43 | and ws2.d_year = 2000 44 | and ws1.ca_county = ws3.ca_county 45 | and ws3.d_qoy = 3 46 | and ws3.d_year = 2000 47 | and case when ws1.web_sales > 0 then ws2.web_sales / ws1.web_sales else null end 48 | > case when ss1.store_sales > 0 then ss2.store_sales / ss1.store_sales else null end 49 | and case when ws2.web_sales > 0 then ws3.web_sales / ws2.web_sales else null end 50 | > case when ss2.store_sales > 0 then ss3.store_sales / ss2.store_sales else null end 51 | order by ss1.ca_county; 52 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query32.sql: -------------------------------------------------------------------------------- 1 | insert into query32 2 | select sum(cs_ext_discount_amt) as excess_discount_amount 3 | from catalog_sales 4 | , item 5 | , date_dim 6 | where i_manufact_id = 977 7 | and i_item_sk = cs_item_sk 8 | and d_date between '2000-01-27' and 9 | date_add(cast('2000-01-27' as date), 90) 10 | and d_date_sk = cs_sold_date_sk 11 | and cs_ext_discount_amt 12 | > ( 13 | select 1.3 * avg(cs_ext_discount_amt) 14 | from catalog_sales 15 | , date_dim 16 | where cs_item_sk = i_item_sk 17 | and d_date between '2000-01-27' and 18 | date_add(cast('2000-01-27' as date), 90) 19 | and d_date_sk = cs_sold_date_sk 20 | ) 21 | limit 100; 22 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query33.sql: -------------------------------------------------------------------------------- 1 | with ss as ( 2 | select i_manufact_id, 3 | sum(ss_ext_sales_price) total_sales 4 | from store_sales, 5 | date_dim, 6 | customer_address, 7 | item 8 | where i_manufact_id in (select i_manufact_id 9 | from item 10 | where i_category in ('Electronics')) 11 | and ss_item_sk = i_item_sk 12 | and ss_sold_date_sk = d_date_sk 13 | and d_year = 1998 14 | and d_moy = 5 15 | and ss_addr_sk = ca_address_sk 16 | and ca_gmt_offset = -5 17 | group by i_manufact_id), 18 | cs as ( 19 | select i_manufact_id, 20 | sum(cs_ext_sales_price) total_sales 21 | from catalog_sales, 22 | date_dim, 23 | customer_address, 24 | item 25 | where i_manufact_id in (select i_manufact_id 26 | from item 27 | where i_category in ('Electronics')) 28 | and cs_item_sk = i_item_sk 29 | and cs_sold_date_sk = d_date_sk 30 | and d_year = 1998 31 | and d_moy = 5 32 | and cs_bill_addr_sk = ca_address_sk 33 | and ca_gmt_offset = -5 34 | group by i_manufact_id), 35 | ws as ( 36 | select i_manufact_id, 37 | sum(ws_ext_sales_price) total_sales 38 | from web_sales, 39 | date_dim, 40 | customer_address, 41 | item 42 | where i_manufact_id in (select i_manufact_id 43 | from item 44 | where i_category in ('Electronics')) 45 | and ws_item_sk = i_item_sk 46 | and ws_sold_date_sk = d_date_sk 47 | and d_year = 1998 48 | and d_moy = 5 49 | and ws_bill_addr_sk = ca_address_sk 50 | and ca_gmt_offset = -5 51 | group by i_manufact_id) 52 | insert into query33 53 | select i_manufact_id, sum(total_sales) total_sales 54 | from (select * 55 | from ss 56 | union all 57 | select * 58 | from cs 59 | union all 60 | select * 61 | from ws) tmp1 62 | group by i_manufact_id 63 | order by total_sales 64 | limit 100; 65 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query34.sql: -------------------------------------------------------------------------------- 1 | insert into query34 2 | select c_last_name 3 | , c_first_name 4 | , c_salutation 5 | , c_preferred_cust_flag 6 | , ss_ticket_number 7 | , cnt 8 | from (select ss_ticket_number 9 | , ss_customer_sk 10 | , count(*) cnt 11 | from store_sales, 12 | date_dim, 13 | store, 14 | household_demographics 15 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 16 | and store_sales.ss_store_sk = store.s_store_sk 17 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 18 | and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) 19 | and (household_demographics.hd_buy_potential = '>10000' or 20 | household_demographics.hd_buy_potential = 'Unknown') 21 | and household_demographics.hd_vehicle_count > 0 22 | and (case 23 | when household_demographics.hd_vehicle_count > 0 24 | then household_demographics.hd_dep_count / household_demographics.hd_vehicle_count 25 | else null 26 | end) > 1.2 27 | and date_dim.d_year in (1999, 1999 + 1, 1999 + 2) 28 | and store.s_county in ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 29 | 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') 30 | group by ss_ticket_number, ss_customer_sk) dn, 31 | customer 32 | where ss_customer_sk = c_customer_sk 33 | and cnt between 15 and 20 34 | order by c_last_name, c_first_name, c_salutation, c_preferred_cust_flag desc, ss_ticket_number; 35 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query35.sql: -------------------------------------------------------------------------------- 1 | insert into query35 2 | select ca_state, 3 | cd_gender, 4 | cd_marital_status, 5 | cd_dep_count, 6 | count(*) cnt1, 7 | min(cd_dep_count), 8 | max(cd_dep_count), 9 | avg(cd_dep_count), 10 | cd_dep_employed_count, 11 | count(*) cnt2, 12 | min(cd_dep_employed_count), 13 | max(cd_dep_employed_count), 14 | avg(cd_dep_employed_count), 15 | cd_dep_college_count, 16 | count(*) cnt3, 17 | min(cd_dep_college_count), 18 | max(cd_dep_college_count), 19 | avg(cd_dep_college_count) 20 | from customer c, 21 | customer_address ca, 22 | customer_demographics 23 | where c.c_current_addr_sk = ca.ca_address_sk 24 | and cd_demo_sk = c.c_current_cdemo_sk 25 | and exists (select * 26 | from store_sales, 27 | date_dim 28 | where c.c_customer_sk = ss_customer_sk 29 | and ss_sold_date_sk = d_date_sk 30 | and d_year = 2002 31 | and d_qoy < 4) 32 | and (exists (select * 33 | from web_sales, 34 | date_dim 35 | where c.c_customer_sk = ws_bill_customer_sk 36 | and ws_sold_date_sk = d_date_sk 37 | and d_year = 2002 38 | and d_qoy < 4) or 39 | exists (select * 40 | from catalog_sales, 41 | date_dim 42 | where c.c_customer_sk = cs_ship_customer_sk 43 | and cs_sold_date_sk = d_date_sk 44 | and d_year = 2002 45 | and d_qoy < 4)) 46 | group by ca_state, 47 | cd_gender, 48 | cd_marital_status, 49 | cd_dep_count, 50 | cd_dep_employed_count, 51 | cd_dep_college_count 52 | order by ca_state, 53 | cd_gender, 54 | cd_marital_status, 55 | cd_dep_count, 56 | cd_dep_employed_count, 57 | cd_dep_college_count 58 | limit 100; 59 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query36.sql: -------------------------------------------------------------------------------- 1 | insert into query36 2 | select sum(ss_net_profit) / sum(ss_ext_sales_price) as gross_margin 3 | , i_category 4 | , i_class 5 | , grouping(i_category) + grouping(i_class) as lochierarchy 6 | , rank() over ( 7 | partition by grouping(i_category) + grouping(i_class), 8 | case when grouping(i_class) = 0 then i_category end 9 | order by sum(ss_net_profit) / sum(ss_ext_sales_price) asc) as rank_within_parent 10 | from store_sales 11 | , date_dim d1 12 | , item 13 | , store 14 | where d1.d_year = 2001 15 | and d1.d_date_sk = ss_sold_date_sk 16 | and i_item_sk = ss_item_sk 17 | and s_store_sk = ss_store_sk 18 | and s_state in ('TN', 'TN', 'TN', 'TN', 19 | 'TN', 'TN', 'TN', 'TN') 20 | group by rollup (i_category, i_class) 21 | order by lochierarchy desc 22 | , case when lochierarchy = 0 then i_category end 23 | , rank_within_parent 24 | limit 100; 25 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query37.sql: -------------------------------------------------------------------------------- 1 | insert into query37 2 | select i_item_id 3 | , i_item_desc 4 | , i_current_price 5 | from item, 6 | inventory, 7 | date_dim, 8 | catalog_sales 9 | where i_current_price between 68 and 68 + 30 10 | and inv_item_sk = i_item_sk 11 | and d_date_sk = inv_date_sk 12 | and d_date between cast('2000-02-01' as date) and date_add(cast('2000-02-01' as date), 60) 13 | and i_manufact_id in (677, 940, 694, 808) 14 | and inv_quantity_on_hand between 100 and 500 15 | and cs_item_sk = i_item_sk 16 | group by i_item_id, i_item_desc, i_current_price 17 | order by i_item_id 18 | limit 100; 19 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query38.sql: -------------------------------------------------------------------------------- 1 | insert into query38 2 | select count(*) 3 | from ( 4 | select distinct c_last_name, c_first_name, d_date 5 | from store_sales, 6 | date_dim, 7 | customer 8 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 9 | and store_sales.ss_customer_sk = customer.c_customer_sk 10 | and d_month_seq between 1200 and 1200 + 11 11 | intersect 12 | select distinct c_last_name, c_first_name, d_date 13 | from catalog_sales, 14 | date_dim, 15 | customer 16 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 17 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 18 | and d_month_seq between 1200 and 1200 + 11 19 | intersect 20 | select distinct c_last_name, c_first_name, d_date 21 | from web_sales, 22 | date_dim, 23 | customer 24 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 25 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 26 | and d_month_seq between 1200 and 1200 + 11 27 | ) hot_cust 28 | limit 100; 29 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query40.sql: -------------------------------------------------------------------------------- 1 | insert into query40 2 | select w_state 3 | , i_item_id 4 | , sum(case 5 | when (cast(d_date as date) < cast('2000-03-11' as date)) 6 | then cs_sales_price - coalesce(cr_refunded_cash, 0) 7 | else 0 end) as sales_before 8 | , sum(case 9 | when (cast(d_date as date) >= cast('2000-03-11' as date)) 10 | then cs_sales_price - coalesce(cr_refunded_cash, 0) 11 | else 0 end) as sales_after 12 | from catalog_sales 13 | left outer join catalog_returns on 14 | (cs_order_number = cr_order_number 15 | and cs_item_sk = cr_item_sk) 16 | , warehouse 17 | , item 18 | , date_dim 19 | where i_current_price between 0.99 and 1.49 20 | and i_item_sk = cs_item_sk 21 | and cs_warehouse_sk = w_warehouse_sk 22 | and cs_sold_date_sk = d_date_sk 23 | and d_date between date_sub(cast('2000-03-11' as date), 30) 24 | and date_add(cast('2000-03-11' as date), 30) 25 | group by w_state, i_item_id 26 | order by w_state, i_item_id 27 | limit 100; 28 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query41.sql: -------------------------------------------------------------------------------- 1 | insert into query41 2 | select distinct(i_product_name) 3 | from item i1 4 | where i_manufact_id between 738 and 738 + 40 5 | and (select count(*) as item_cnt 6 | from item 7 | where (i_manufact = i1.i_manufact and 8 | ((i_category = 'Women' and 9 | (i_color = 'powder' or i_color = 'khaki') and 10 | (i_units = 'Ounce' or i_units = 'Oz') and 11 | (i_size = 'medium' or i_size = 'extra large') 12 | ) or 13 | (i_category = 'Women' and 14 | (i_color = 'brown' or i_color = 'honeydew') and 15 | (i_units = 'Bunch' or i_units = 'Ton') and 16 | (i_size = 'N/A' or i_size = 'small') 17 | ) or 18 | (i_category = 'Men' and 19 | (i_color = 'floral' or i_color = 'deep') and 20 | (i_units = 'N/A' or i_units = 'Dozen') and 21 | (i_size = 'petite' or i_size = 'large') 22 | ) or 23 | (i_category = 'Men' and 24 | (i_color = 'light' or i_color = 'cornflower') and 25 | (i_units = 'Box' or i_units = 'Pound') and 26 | (i_size = 'medium' or i_size = 'extra large') 27 | ))) 28 | or (i_manufact = i1.i_manufact and 29 | ((i_category = 'Women' and 30 | (i_color = 'midnight' or i_color = 'snow') and 31 | (i_units = 'Pallet' or i_units = 'Gross') and 32 | (i_size = 'medium' or i_size = 'extra large') 33 | ) or 34 | (i_category = 'Women' and 35 | (i_color = 'cyan' or i_color = 'papaya') and 36 | (i_units = 'Cup' or i_units = 'Dram') and 37 | (i_size = 'N/A' or i_size = 'small') 38 | ) or 39 | (i_category = 'Men' and 40 | (i_color = 'orange' or i_color = 'frosted') and 41 | (i_units = 'Each' or i_units = 'Tbl') and 42 | (i_size = 'petite' or i_size = 'large') 43 | ) or 44 | (i_category = 'Men' and 45 | (i_color = 'forest' or i_color = 'ghost') and 46 | (i_units = 'Lb' or i_units = 'Bundle') and 47 | (i_size = 'medium' or i_size = 'extra large') 48 | )))) > 0 49 | order by i_product_name 50 | limit 100; 51 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query42.sql: -------------------------------------------------------------------------------- 1 | insert into query42 2 | select dt.d_year 3 | , item.i_category_id 4 | , item.i_category 5 | , sum(ss_ext_sales_price) 6 | from date_dim dt 7 | , store_sales 8 | , item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy = 11 13 | and dt.d_year = 2000 14 | group by dt.d_year 15 | , item.i_category_id 16 | , item.i_category 17 | order by sum(ss_ext_sales_price) desc, dt.d_year 18 | , item.i_category_id 19 | , item.i_category 20 | limit 100; 21 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query43.sql: -------------------------------------------------------------------------------- 1 | insert into query43 2 | select s_store_name, 3 | s_store_id, 4 | sum(case when (d_day_name = 'Sunday') then ss_sales_price else null end) sun_sales, 5 | sum(case when (d_day_name = 'Monday') then ss_sales_price else null end) mon_sales, 6 | sum(case when (d_day_name = 'Tuesday') then ss_sales_price else null end) tue_sales, 7 | sum(case when (d_day_name = 'Wednesday') then ss_sales_price else null end) wed_sales, 8 | sum(case when (d_day_name = 'Thursday') then ss_sales_price else null end) thu_sales, 9 | sum(case when (d_day_name = 'Friday') then ss_sales_price else null end) fri_sales, 10 | sum(case when (d_day_name = 'Saturday') then ss_sales_price else null end) sat_sales 11 | from date_dim, 12 | store_sales, 13 | store 14 | where d_date_sk = ss_sold_date_sk 15 | and s_store_sk = ss_store_sk 16 | and s_gmt_offset = -5 17 | and d_year = 2000 18 | group by s_store_name, s_store_id 19 | order by s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales, thu_sales, fri_sales, sat_sales 20 | limit 100; 21 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query44.sql: -------------------------------------------------------------------------------- 1 | insert into query44 2 | select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing 3 | from (select * 4 | from (select item_sk, rank() over (order by rank_col asc) rnk 5 | from (select ss_item_sk item_sk, avg(ss_net_profit) rank_col 6 | from store_sales ss1 7 | where ss_store_sk = 4 8 | group by ss_item_sk 9 | having avg(ss_net_profit) > 0.9 * (select avg(ss_net_profit) rank_col 10 | from store_sales 11 | where ss_store_sk = 4 12 | and ss_addr_sk is null 13 | group by ss_store_sk)) V1) V11 14 | where rnk < 11) asceding, 15 | (select * 16 | from (select item_sk, rank() over (order by rank_col desc) rnk 17 | from (select ss_item_sk item_sk, avg(ss_net_profit) rank_col 18 | from store_sales ss1 19 | where ss_store_sk = 4 20 | group by ss_item_sk 21 | having avg(ss_net_profit) > 0.9 * (select avg(ss_net_profit) rank_col 22 | from store_sales 23 | where ss_store_sk = 4 24 | and ss_addr_sk is null 25 | group by ss_store_sk)) V2) V21 26 | where rnk < 11) descending, 27 | item i1, 28 | item i2 29 | where asceding.rnk = descending.rnk 30 | and i1.i_item_sk = asceding.item_sk 31 | and i2.i_item_sk = descending.item_sk 32 | order by asceding.rnk 33 | limit 100; 34 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query45.sql: -------------------------------------------------------------------------------- 1 | insert into query45 2 | select ca_zip, ca_city, sum(ws_sales_price) 3 | from web_sales, 4 | customer, 5 | customer_address, 6 | date_dim, 7 | item 8 | where ws_bill_customer_sk = c_customer_sk 9 | and c_current_addr_sk = ca_address_sk 10 | and ws_item_sk = i_item_sk 11 | and (substr(ca_zip, 1, 5) in ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') 12 | or 13 | i_item_id in (select i_item_id 14 | from item 15 | where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 16 | ) 17 | ) 18 | and ws_sold_date_sk = d_date_sk 19 | and d_qoy = 2 20 | and d_year = 2001 21 | group by ca_zip, ca_city 22 | order by ca_zip, ca_city 23 | limit 100; 24 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query46.sql: -------------------------------------------------------------------------------- 1 | insert into query46 2 | select c_last_name 3 | , c_first_name 4 | , ca_city 5 | , bought_city 6 | , ss_ticket_number 7 | , amt 8 | , profit 9 | from (select ss_ticket_number 10 | , ss_customer_sk 11 | , ca_city bought_city 12 | , sum(ss_coupon_amt) amt 13 | , sum(ss_net_profit) profit 14 | from store_sales, 15 | date_dim, 16 | store, 17 | household_demographics, 18 | customer_address 19 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 20 | and store_sales.ss_store_sk = store.s_store_sk 21 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 22 | and store_sales.ss_addr_sk = customer_address.ca_address_sk 23 | and (household_demographics.hd_dep_count = 4 or 24 | household_demographics.hd_vehicle_count = 3) 25 | and date_dim.d_dow in (6, 0) 26 | and date_dim.d_year in (1999, 1999 + 1, 1999 + 2) 27 | and store.s_city in ('Fairview', 'Midway', 'Fairview', 'Fairview', 'Fairview') 28 | group by ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, 29 | customer, 30 | customer_address current_addr 31 | where ss_customer_sk = c_customer_sk 32 | and customer.c_current_addr_sk = current_addr.ca_address_sk 33 | and current_addr.ca_city <> bought_city 34 | order by c_last_name 35 | , c_first_name 36 | , ca_city 37 | , bought_city 38 | , ss_ticket_number 39 | limit 100; 40 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query47.sql: -------------------------------------------------------------------------------- 1 | with v1 as ( 2 | select i_category, 3 | i_brand, 4 | s_store_name, 5 | s_company_name, 6 | d_year, 7 | d_moy, 8 | sum(ss_sales_price) sum_sales, 9 | avg(sum(ss_sales_price)) over 10 | (partition by i_category, i_brand, 11 | s_store_name, s_company_name, d_year) 12 | avg_monthly_sales, 13 | rank() over 14 | (partition by i_category, i_brand, 15 | s_store_name, s_company_name 16 | order by d_year, d_moy) rn 17 | from item, 18 | store_sales, 19 | date_dim, 20 | store 21 | where ss_item_sk = i_item_sk 22 | and ss_sold_date_sk = d_date_sk 23 | and ss_store_sk = s_store_sk 24 | and ( 25 | d_year = 1999 or 26 | (d_year = 1999 - 1 and d_moy = 12) or 27 | (d_year = 1999 + 1 and d_moy = 1) 28 | ) 29 | group by i_category, i_brand, 30 | s_store_name, s_company_name, 31 | d_year, d_moy), 32 | v2 as ( 33 | select v1.i_category 34 | , v1.i_brand 35 | , v1.s_store_name 36 | , v1.s_company_name 37 | , v1.d_year 38 | , v1.d_moy 39 | , v1.avg_monthly_sales 40 | , v1.sum_sales 41 | , v1_lag.sum_sales psum 42 | , v1_lead.sum_sales nsum 43 | from v1, 44 | v1 v1_lag, 45 | v1 v1_lead 46 | where v1.i_category = v1_lag.i_category 47 | and v1.i_category = v1_lead.i_category 48 | and v1.i_brand = v1_lag.i_brand 49 | and v1.i_brand = v1_lead.i_brand 50 | and v1.s_store_name = v1_lag.s_store_name 51 | and v1.s_store_name = v1_lead.s_store_name 52 | and v1.s_company_name = v1_lag.s_company_name 53 | and v1.s_company_name = v1_lead.s_company_name 54 | and v1.rn = v1_lag.rn + 1 55 | and v1.rn = v1_lead.rn - 1) 56 | insert into query47 57 | select * 58 | from v2 59 | where d_year = 1999 60 | and avg_monthly_sales > 0 61 | and case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 62 | order by sum_sales - avg_monthly_sales, 3 63 | limit 100; 64 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query48.sql: -------------------------------------------------------------------------------- 1 | insert into query48 2 | select sum(ss_quantity) 3 | from store_sales, 4 | store, 5 | customer_demographics, 6 | customer_address, 7 | date_dim 8 | where s_store_sk = ss_store_sk 9 | and ss_sold_date_sk = d_date_sk 10 | and d_year = 2000 11 | and ( 12 | ( 13 | cd_demo_sk = ss_cdemo_sk 14 | and 15 | cd_marital_status = 'M' 16 | and 17 | cd_education_status = '4 yr Degree' 18 | and 19 | ss_sales_price between 100.00 and 150.00 20 | ) 21 | or 22 | ( 23 | cd_demo_sk = ss_cdemo_sk 24 | and 25 | cd_marital_status = 'D' 26 | and 27 | cd_education_status = '2 yr Degree' 28 | and 29 | ss_sales_price between 50.00 and 100.00 30 | ) 31 | or 32 | ( 33 | cd_demo_sk = ss_cdemo_sk 34 | and 35 | cd_marital_status = 'S' 36 | and 37 | cd_education_status = 'College' 38 | and 39 | ss_sales_price between 150.00 and 200.00 40 | ) 41 | ) 42 | and ( 43 | ( 44 | ss_addr_sk = ca_address_sk 45 | and 46 | ca_country = 'United States' 47 | and 48 | ca_state in ('CO', 'OH', 'TX') 49 | and ss_net_profit between 0 and 2000 50 | ) 51 | or 52 | (ss_addr_sk = ca_address_sk 53 | and 54 | ca_country = 'United States' 55 | and 56 | ca_state in ('OR', 'MN', 'KY') 57 | and ss_net_profit between 150 and 3000 58 | ) 59 | or 60 | (ss_addr_sk = ca_address_sk 61 | and 62 | ca_country = 'United States' 63 | and 64 | ca_state in ('VA', 'CA', 'MS') 65 | and ss_net_profit between 50 and 25000 66 | ) 67 | ) 68 | ; 69 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query50.sql: -------------------------------------------------------------------------------- 1 | insert into query50 2 | select s_store_name 3 | , s_company_id 4 | , s_street_number 5 | , s_street_name 6 | , s_street_type 7 | , s_suite_number 8 | , s_city 9 | , s_county 10 | , s_state 11 | , s_zip 12 | , sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30) then 1 else 0 end) as 30_days 13 | , sum(case 14 | when (sr_returned_date_sk - ss_sold_date_sk > 30) and 15 | (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 16 | else 0 end) as 31_60_days 17 | , sum(case 18 | when (sr_returned_date_sk - ss_sold_date_sk > 60) and 19 | (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 20 | else 0 end) as 61_90_days 21 | , sum(case 22 | when (sr_returned_date_sk - ss_sold_date_sk > 90) and 23 | (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 24 | else 0 end) as 91_120_days 25 | , sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as above120_days 26 | from store_sales 27 | , store_returns 28 | , store 29 | , date_dim d1 30 | , date_dim d2 31 | where d2.d_year = 2001 32 | and d2.d_moy = 8 33 | and ss_ticket_number = sr_ticket_number 34 | and ss_item_sk = sr_item_sk 35 | and ss_sold_date_sk = d1.d_date_sk 36 | and sr_returned_date_sk = d2.d_date_sk 37 | and ss_customer_sk = sr_customer_sk 38 | and ss_store_sk = s_store_sk 39 | group by s_store_name 40 | , s_company_id 41 | , s_street_number 42 | , s_street_name 43 | , s_street_type 44 | , s_suite_number 45 | , s_city 46 | , s_county 47 | , s_state 48 | , s_zip 49 | order by s_store_name 50 | , s_company_id 51 | , s_street_number 52 | , s_street_name 53 | , s_street_type 54 | , s_suite_number 55 | , s_city 56 | , s_county 57 | , s_state 58 | , s_zip 59 | limit 100; 60 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query51.sql: -------------------------------------------------------------------------------- 1 | WITH web_v1 as ( 2 | select ws_item_sk item_sk, 3 | d_date, 4 | sum(sum(ws_sales_price)) 5 | over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales 6 | from web_sales 7 | , date_dim 8 | where ws_sold_date_sk = d_date_sk 9 | and d_month_seq between 1200 and 1200 + 11 10 | and ws_item_sk is not NULL 11 | group by ws_item_sk, d_date), 12 | store_v1 as ( 13 | select ss_item_sk item_sk, 14 | d_date, 15 | sum(sum(ss_sales_price)) 16 | over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales 17 | from store_sales 18 | , date_dim 19 | where ss_sold_date_sk = d_date_sk 20 | and d_month_seq between 1200 and 1200 + 11 21 | and ss_item_sk is not NULL 22 | group by ss_item_sk, d_date) 23 | insert into query51 24 | select * 25 | from (select item_sk 26 | , d_date 27 | , web_sales 28 | , store_sales 29 | , max(web_sales) 30 | over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative 31 | , max(store_sales) 32 | over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative 33 | from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk 34 | , case when web.d_date is not null then web.d_date else store.d_date end d_date 35 | , web.cume_sales web_sales 36 | , store.cume_sales store_sales 37 | from web_v1 web 38 | full outer join store_v1 store on (web.item_sk = store.item_sk 39 | and web.d_date = store.d_date) 40 | ) x) y 41 | where web_cumulative > store_cumulative 42 | order by item_sk 43 | , d_date 44 | limit 100; 45 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query52.sql: -------------------------------------------------------------------------------- 1 | insert into query52 2 | select dt.d_year 3 | , item.i_brand_id brand_id 4 | , item.i_brand brand 5 | , sum(ss_ext_sales_price) ext_price 6 | from date_dim dt 7 | , store_sales 8 | , item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy = 11 13 | and dt.d_year = 2000 14 | group by dt.d_year 15 | , item.i_brand 16 | , item.i_brand_id 17 | order by dt.d_year 18 | , ext_price desc 19 | , brand_id 20 | limit 100; 21 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query53.sql: -------------------------------------------------------------------------------- 1 | insert into query53 2 | select * 3 | from (select i_manufact_id, 4 | sum(ss_sales_price) sum_sales, 5 | avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales 6 | from item, 7 | store_sales, 8 | date_dim, 9 | store 10 | where ss_item_sk = i_item_sk 11 | and ss_sold_date_sk = d_date_sk 12 | and ss_store_sk = s_store_sk 13 | and d_month_seq in 14 | (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 15 | 1200 + 11) 16 | and ((i_category in ('Books', 'Children', 'Electronics') and 17 | i_class in ('personal', 'portable', 'reference', 'self-help') and 18 | i_brand in ('scholaramalgamalg #14', 'scholaramalgamalg #7', 19 | 'exportiunivamalg #9', 'scholaramalgamalg #9')) 20 | or (i_category in ('Women', 'Music', 'Men') and 21 | i_class in ('accessories', 'classical', 'fragrances', 'pants') and 22 | i_brand in ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 23 | 'importoamalg #1'))) 24 | group by i_manufact_id, d_qoy) tmp1 25 | where case 26 | when avg_quarterly_sales > 0 27 | then abs(sum_sales - avg_quarterly_sales) / avg_quarterly_sales 28 | else null end > 0.1 29 | order by avg_quarterly_sales, 30 | sum_sales, 31 | i_manufact_id 32 | limit 100; 33 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query54.sql: -------------------------------------------------------------------------------- 1 | with my_customers as ( 2 | select distinct c_customer_sk 3 | , c_current_addr_sk 4 | from (select cs_sold_date_sk sold_date_sk, 5 | cs_bill_customer_sk customer_sk, 6 | cs_item_sk item_sk 7 | from catalog_sales 8 | union all 9 | select ws_sold_date_sk sold_date_sk, 10 | ws_bill_customer_sk customer_sk, 11 | ws_item_sk item_sk 12 | from web_sales 13 | ) cs_or_ws_sales, 14 | item, 15 | date_dim, 16 | customer 17 | where sold_date_sk = d_date_sk 18 | and item_sk = i_item_sk 19 | and i_category = 'Women' 20 | and i_class = 'maternity' 21 | and c_customer_sk = cs_or_ws_sales.customer_sk 22 | and d_moy = 12 23 | and d_year = 1998 24 | ) 25 | , my_revenue as ( 26 | select c_customer_sk, 27 | sum(ss_ext_sales_price) as revenue 28 | from my_customers, 29 | store_sales, 30 | customer_address, 31 | store, 32 | date_dim 33 | where c_current_addr_sk = ca_address_sk 34 | and ca_county = s_county 35 | and ca_state = s_state 36 | and ss_sold_date_sk = d_date_sk 37 | and c_customer_sk = ss_customer_sk 38 | and d_month_seq between (select distinct d_month_seq + 1 39 | from date_dim 40 | where d_year = 1998 41 | and d_moy = 12) 42 | and (select distinct d_month_seq + 3 43 | from date_dim 44 | where d_year = 1998 45 | and d_moy = 12) 46 | group by c_customer_sk 47 | ) 48 | , segments as 49 | (select cast((revenue / 50) as int) as segment 50 | from my_revenue 51 | ) 52 | insert into query54 53 | select segment, count(*) as num_customers, segment * 50 as segment_base 54 | from segments 55 | group by segment 56 | order by segment, num_customers 57 | limit 100; 58 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query55.sql: -------------------------------------------------------------------------------- 1 | insert into query55 2 | select i_brand_id brand_id, 3 | i_brand brand, 4 | sum(ss_ext_sales_price) ext_price 5 | from date_dim, 6 | store_sales, 7 | item 8 | where d_date_sk = ss_sold_date_sk 9 | and ss_item_sk = i_item_sk 10 | and i_manager_id = 28 11 | and d_moy = 11 12 | and d_year = 1999 13 | group by i_brand, i_brand_id 14 | order by ext_price desc, i_brand_id 15 | limit 100; 16 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query56.sql: -------------------------------------------------------------------------------- 1 | with ss as ( 2 | select i_item_id, sum(ss_ext_sales_price) total_sales 3 | from store_sales, 4 | date_dim, 5 | customer_address, 6 | item 7 | where i_item_id in (select i_item_id 8 | from item 9 | where i_color in ('slate', 'blanched', 'burnished')) 10 | and ss_item_sk = i_item_sk 11 | and ss_sold_date_sk = d_date_sk 12 | and d_year = 2001 13 | and d_moy = 2 14 | and ss_addr_sk = ca_address_sk 15 | and ca_gmt_offset = -5 16 | group by i_item_id), 17 | cs as ( 18 | select i_item_id, sum(cs_ext_sales_price) total_sales 19 | from catalog_sales, 20 | date_dim, 21 | customer_address, 22 | item 23 | where i_item_id in (select i_item_id 24 | from item 25 | where i_color in ('slate', 'blanched', 'burnished')) 26 | and cs_item_sk = i_item_sk 27 | and cs_sold_date_sk = d_date_sk 28 | and d_year = 2001 29 | and d_moy = 2 30 | and cs_bill_addr_sk = ca_address_sk 31 | and ca_gmt_offset = -5 32 | group by i_item_id), 33 | ws as ( 34 | select i_item_id, sum(ws_ext_sales_price) total_sales 35 | from web_sales, 36 | date_dim, 37 | customer_address, 38 | item 39 | where i_item_id in (select i_item_id 40 | from item 41 | where i_color in ('slate', 'blanched', 'burnished')) 42 | and ws_item_sk = i_item_sk 43 | and ws_sold_date_sk = d_date_sk 44 | and d_year = 2001 45 | and d_moy = 2 46 | and ws_bill_addr_sk = ca_address_sk 47 | and ca_gmt_offset = -5 48 | group by i_item_id) 49 | insert into query56 50 | select i_item_id, sum(total_sales) total_sales 51 | from (select * 52 | from ss 53 | union all 54 | select * 55 | from cs 56 | union all 57 | select * 58 | from ws) tmp1 59 | group by i_item_id 60 | order by total_sales 61 | limit 100; 62 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query57.sql: -------------------------------------------------------------------------------- 1 | with v1 as ( 2 | select i_category, 3 | i_brand, 4 | cc_name, 5 | d_year, 6 | d_moy, 7 | sum(cs_sales_price) sum_sales, 8 | avg(sum(cs_sales_price)) over 9 | (partition by i_category, i_brand, 10 | cc_name, d_year) 11 | avg_monthly_sales, 12 | rank() over 13 | (partition by i_category, i_brand, 14 | cc_name 15 | order by d_year, d_moy) rn 16 | from item, 17 | catalog_sales, 18 | date_dim, 19 | call_center 20 | where cs_item_sk = i_item_sk 21 | and cs_sold_date_sk = d_date_sk 22 | and cc_call_center_sk = cs_call_center_sk 23 | and ( 24 | d_year = 1999 or 25 | (d_year = 1999 - 1 and d_moy = 12) or 26 | (d_year = 1999 + 1 and d_moy = 1) 27 | ) 28 | group by i_category, i_brand, 29 | cc_name, d_year, d_moy), 30 | v2 as ( 31 | select v1.i_category 32 | , v1.i_brand 33 | , v1.cc_name 34 | , v1.d_year 35 | , v1.d_moy 36 | , v1.avg_monthly_sales 37 | , v1.sum_sales 38 | , v1_lag.sum_sales psum 39 | , v1_lead.sum_sales nsum 40 | from v1, 41 | v1 v1_lag, 42 | v1 v1_lead 43 | where v1.i_category = v1_lag.i_category 44 | and v1.i_category = v1_lead.i_category 45 | and v1.i_brand = v1_lag.i_brand 46 | and v1.i_brand = v1_lead.i_brand 47 | and v1.cc_name = v1_lag.cc_name 48 | and v1.cc_name = v1_lead.cc_name 49 | and v1.rn = v1_lag.rn + 1 50 | and v1.rn = v1_lead.rn - 1) 51 | insert into query57 52 | select * 53 | from v2 54 | where d_year = 1999 55 | and avg_monthly_sales > 0 56 | and case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 57 | order by sum_sales - avg_monthly_sales, 3 58 | limit 100; 59 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query60.sql: -------------------------------------------------------------------------------- 1 | with ss as ( 2 | select i_item_id, 3 | sum(ss_ext_sales_price) total_sales 4 | from store_sales, 5 | date_dim, 6 | customer_address, 7 | item 8 | where i_item_id in (select i_item_id 9 | from item 10 | where i_category in ('Music')) 11 | and ss_item_sk = i_item_sk 12 | and ss_sold_date_sk = d_date_sk 13 | and d_year = 1998 14 | and d_moy = 9 15 | and ss_addr_sk = ca_address_sk 16 | and ca_gmt_offset = -5 17 | group by i_item_id), 18 | cs as ( 19 | select i_item_id, 20 | sum(cs_ext_sales_price) total_sales 21 | from catalog_sales, 22 | date_dim, 23 | customer_address, 24 | item 25 | where i_item_id in (select i_item_id 26 | from item 27 | where i_category in ('Music')) 28 | and cs_item_sk = i_item_sk 29 | and cs_sold_date_sk = d_date_sk 30 | and d_year = 1998 31 | and d_moy = 9 32 | and cs_bill_addr_sk = ca_address_sk 33 | and ca_gmt_offset = -5 34 | group by i_item_id), 35 | ws as ( 36 | select i_item_id, 37 | sum(ws_ext_sales_price) total_sales 38 | from web_sales, 39 | date_dim, 40 | customer_address, 41 | item 42 | where i_item_id in (select i_item_id 43 | from item 44 | where i_category in ('Music')) 45 | and ws_item_sk = i_item_sk 46 | and ws_sold_date_sk = d_date_sk 47 | and d_year = 1998 48 | and d_moy = 9 49 | and ws_bill_addr_sk = ca_address_sk 50 | and ca_gmt_offset = -5 51 | group by i_item_id) 52 | insert into query60 53 | select i_item_id 54 | , sum(total_sales) total_sales 55 | from (select * 56 | from ss 57 | union all 58 | select * 59 | from cs 60 | union all 61 | select * 62 | from ws) tmp1 63 | group by i_item_id 64 | order by i_item_id 65 | , total_sales 66 | limit 100; 67 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query61.sql: -------------------------------------------------------------------------------- 1 | insert into query61 2 | select promotions, total, cast(promotions as decimal(15, 4)) / cast(total as decimal(15, 4)) * 100 3 | from (select sum(ss_ext_sales_price) promotions 4 | from store_sales 5 | , store 6 | , promotion 7 | , date_dim 8 | , customer 9 | , customer_address 10 | , item 11 | where ss_sold_date_sk = d_date_sk 12 | and ss_store_sk = s_store_sk 13 | and ss_promo_sk = p_promo_sk 14 | and ss_customer_sk = c_customer_sk 15 | and ca_address_sk = c_current_addr_sk 16 | and ss_item_sk = i_item_sk 17 | and ca_gmt_offset = -5 18 | and i_category = 'Jewelry' 19 | and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') 20 | and s_gmt_offset = -5 21 | and d_year = 1998 22 | and d_moy = 11) promotional_sales, 23 | (select sum(ss_ext_sales_price) total 24 | from store_sales 25 | , store 26 | , date_dim 27 | , customer 28 | , customer_address 29 | , item 30 | where ss_sold_date_sk = d_date_sk 31 | and ss_store_sk = s_store_sk 32 | and ss_customer_sk = c_customer_sk 33 | and ca_address_sk = c_current_addr_sk 34 | and ss_item_sk = i_item_sk 35 | and ca_gmt_offset = -5 36 | and i_category = 'Jewelry' 37 | and s_gmt_offset = -5 38 | and d_year = 1998 39 | and d_moy = 11) all_sales 40 | order by promotions, total 41 | limit 100; 42 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query62.sql: -------------------------------------------------------------------------------- 1 | insert into query62 2 | select substr(w_warehouse_name, 1, 20) 3 | , sm_type 4 | , web_name 5 | , sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30) then 1 else 0 end) as 30_days 6 | , sum(case 7 | when (ws_ship_date_sk - ws_sold_date_sk > 30) and 8 | (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 9 | else 0 end) as 31_60_days 10 | , sum(case 11 | when (ws_ship_date_sk - ws_sold_date_sk > 60) and 12 | (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 13 | else 0 end) as 61_90_days 14 | , sum(case 15 | when (ws_ship_date_sk - ws_sold_date_sk > 90) and 16 | (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 17 | else 0 end) as 91_120_days 18 | , sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as above120_days 19 | from web_sales 20 | , warehouse 21 | , ship_mode 22 | , web_site 23 | , date_dim 24 | where d_month_seq between 1200 and 1200 + 11 25 | and ws_ship_date_sk = d_date_sk 26 | and ws_warehouse_sk = w_warehouse_sk 27 | and ws_ship_mode_sk = sm_ship_mode_sk 28 | and ws_web_site_sk = web_site_sk 29 | group by substr(w_warehouse_name, 1, 20) 30 | , sm_type 31 | , web_name 32 | order by substr(w_warehouse_name, 1, 20) 33 | , sm_type 34 | , web_name 35 | limit 100; 36 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query63.sql: -------------------------------------------------------------------------------- 1 | insert into query63 2 | select * 3 | from (select i_manager_id 4 | , sum(ss_sales_price) sum_sales 5 | , avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales 6 | from item 7 | , store_sales 8 | , date_dim 9 | , store 10 | where ss_item_sk = i_item_sk 11 | and ss_sold_date_sk = d_date_sk 12 | and ss_store_sk = s_store_sk 13 | and d_month_seq in 14 | (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 15 | 1200 + 11) 16 | and ((i_category in ('Books', 'Children', 'Electronics') 17 | and i_class in ('personal', 'portable', 'reference', 'self-help') 18 | and i_brand in ('scholaramalgamalg #14', 'scholaramalgamalg #7', 19 | 'exportiunivamalg #9', 'scholaramalgamalg #9')) 20 | or (i_category in ('Women', 'Music', 'Men') 21 | and i_class in ('accessories', 'classical', 'fragrances', 'pants') 22 | and i_brand in ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 23 | 'importoamalg #1'))) 24 | group by i_manager_id, d_moy) tmp1 25 | where case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 26 | order by i_manager_id 27 | , avg_monthly_sales 28 | , sum_sales 29 | limit 100; 30 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query65.sql: -------------------------------------------------------------------------------- 1 | insert into query65 2 | select s_store_name, 3 | i_item_desc, 4 | sc.revenue, 5 | i_current_price, 6 | i_wholesale_cost, 7 | i_brand 8 | from store, 9 | item, 10 | (select ss_store_sk, avg(revenue) as ave 11 | from (select ss_store_sk, 12 | ss_item_sk, 13 | sum(ss_sales_price) as revenue 14 | from store_sales, 15 | date_dim 16 | where ss_sold_date_sk = d_date_sk 17 | and d_month_seq between 1176 and 1176 + 11 18 | group by ss_store_sk, ss_item_sk) sa 19 | group by ss_store_sk) sb, 20 | (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue 21 | from store_sales, 22 | date_dim 23 | where ss_sold_date_sk = d_date_sk 24 | and d_month_seq between 1176 and 1176 + 11 25 | group by ss_store_sk, ss_item_sk) sc 26 | where sb.ss_store_sk = sc.ss_store_sk 27 | and sc.revenue <= 0.1 * sb.ave 28 | and s_store_sk = sc.ss_store_sk 29 | and i_item_sk = sc.ss_item_sk 30 | order by s_store_name, i_item_desc 31 | limit 100; 32 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query67.sql: -------------------------------------------------------------------------------- 1 | insert into query67 2 | select * 3 | from (select i_category 4 | , i_class 5 | , i_brand 6 | , i_product_name 7 | , d_year 8 | , d_qoy 9 | , d_moy 10 | , s_store_id 11 | , sumsales 12 | , rank() over (partition by i_category order by sumsales desc) rk 13 | from (select i_category 14 | , i_class 15 | , i_brand 16 | , i_product_name 17 | , d_year 18 | , d_qoy 19 | , d_moy 20 | , s_store_id 21 | , sum(coalesce(ss_sales_price * ss_quantity, 0)) sumsales 22 | from store_sales 23 | , date_dim 24 | , store 25 | , item 26 | where ss_sold_date_sk = d_date_sk 27 | and ss_item_sk = i_item_sk 28 | and ss_store_sk = s_store_sk 29 | and d_month_seq between 1200 and 1200 + 11 30 | group by rollup (i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy, s_store_id)) dw1) dw2 31 | where rk <= 100 32 | order by i_category 33 | , i_class 34 | , i_brand 35 | , i_product_name 36 | , d_year 37 | , d_qoy 38 | , d_moy 39 | , s_store_id 40 | , sumsales 41 | , rk 42 | limit 100; 43 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query68.sql: -------------------------------------------------------------------------------- 1 | insert into query68 2 | select c_last_name 3 | , c_first_name 4 | , ca_city 5 | , bought_city 6 | , ss_ticket_number 7 | , extended_price 8 | , extended_tax 9 | , list_price 10 | from (select ss_ticket_number 11 | , ss_customer_sk 12 | , ca_city bought_city 13 | , sum(ss_ext_sales_price) extended_price 14 | , sum(ss_ext_list_price) list_price 15 | , sum(ss_ext_tax) extended_tax 16 | from store_sales 17 | , date_dim 18 | , store 19 | , household_demographics 20 | , customer_address 21 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 22 | and store_sales.ss_store_sk = store.s_store_sk 23 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 24 | and store_sales.ss_addr_sk = customer_address.ca_address_sk 25 | and date_dim.d_dom between 1 and 2 26 | and (household_demographics.hd_dep_count = 4 or 27 | household_demographics.hd_vehicle_count = 3) 28 | and date_dim.d_year in (1999, 1999 + 1, 1999 + 2) 29 | and store.s_city in ('Fairview', 'Midway') 30 | group by ss_ticket_number 31 | , ss_customer_sk 32 | , ss_addr_sk, ca_city) dn 33 | , customer 34 | , customer_address current_addr 35 | where ss_customer_sk = c_customer_sk 36 | and customer.c_current_addr_sk = current_addr.ca_address_sk 37 | and current_addr.ca_city <> bought_city 38 | order by c_last_name 39 | , ss_ticket_number 40 | limit 100; 41 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query69.sql: -------------------------------------------------------------------------------- 1 | insert into query69 2 | select cd_gender, 3 | cd_marital_status, 4 | cd_education_status, 5 | count(*) cnt1, 6 | cd_purchase_estimate, 7 | count(*) cnt2, 8 | cd_credit_rating, 9 | count(*) cnt3 10 | from customer c, 11 | customer_address ca, 12 | customer_demographics 13 | where c.c_current_addr_sk = ca.ca_address_sk 14 | and ca_state in ('KY', 'GA', 'NM') 15 | and cd_demo_sk = c.c_current_cdemo_sk 16 | and exists (select * 17 | from store_sales, 18 | date_dim 19 | where c.c_customer_sk = ss_customer_sk 20 | and ss_sold_date_sk = d_date_sk 21 | and d_year = 2001 22 | and d_moy between 4 and 4 + 2) 23 | and (not exists (select * 24 | from web_sales, 25 | date_dim 26 | where c.c_customer_sk = ws_bill_customer_sk 27 | and ws_sold_date_sk = d_date_sk 28 | and d_year = 2001 29 | and d_moy between 4 and 4 + 2) and 30 | not exists (select * 31 | from catalog_sales, 32 | date_dim 33 | where c.c_customer_sk = cs_ship_customer_sk 34 | and cs_sold_date_sk = d_date_sk 35 | and d_year = 2001 36 | and d_moy between 4 and 4 + 2)) 37 | group by cd_gender, 38 | cd_marital_status, 39 | cd_education_status, 40 | cd_purchase_estimate, 41 | cd_credit_rating 42 | order by cd_gender, 43 | cd_marital_status, 44 | cd_education_status, 45 | cd_purchase_estimate, 46 | cd_credit_rating 47 | limit 100; 48 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query70.sql: -------------------------------------------------------------------------------- 1 | insert into query70 2 | select sum(ss_net_profit) as total_sum 3 | , s_state 4 | , s_county 5 | , grouping (s_state)+ grouping (s_county) as lochierarchy 6 | , rank() over ( 7 | partition by grouping (s_state)+ grouping (s_county) 8 | , case when grouping (s_county) = 0 then s_state end 9 | order by sum(ss_net_profit) desc) as rank_within_parent 10 | from store_sales 11 | , date_dim d1 12 | , store 13 | where d1.d_month_seq between 1200 and 1200 + 11 14 | and d1.d_date_sk = ss_sold_date_sk 15 | and s_store_sk = ss_store_sk 16 | and s_state in 17 | (select s_state 18 | from (select s_state as s_state, 19 | rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking 20 | from store_sales, 21 | store, 22 | date_dim 23 | where d_month_seq between 1200 and 1200 + 11 24 | and d_date_sk = ss_sold_date_sk 25 | and s_store_sk = ss_store_sk 26 | group by s_state 27 | ) tmp1 28 | where ranking <= 5 29 | ) 30 | group by rollup (s_state, s_county) 31 | order by lochierarchy desc 32 | , case when lochierarchy = 0 then s_state end 33 | , rank_within_parent 34 | limit 100; 35 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query71.sql: -------------------------------------------------------------------------------- 1 | insert into query71 2 | select i_brand_id brand_id, 3 | i_brand brand, 4 | t_hour, 5 | t_minute, 6 | sum(ext_price) ext_price 7 | from item, 8 | (select ws_ext_sales_price as ext_price, 9 | ws_sold_date_sk as sold_date_sk, 10 | ws_item_sk as sold_item_sk, 11 | ws_sold_time_sk as time_sk 12 | from web_sales, 13 | date_dim 14 | where d_date_sk = ws_sold_date_sk 15 | and d_moy = 11 16 | and d_year = 1999 17 | union all 18 | select cs_ext_sales_price as ext_price, 19 | cs_sold_date_sk as sold_date_sk, 20 | cs_item_sk as sold_item_sk, 21 | cs_sold_time_sk as time_sk 22 | from catalog_sales, 23 | date_dim 24 | where d_date_sk = cs_sold_date_sk 25 | and d_moy = 11 26 | and d_year = 1999 27 | union all 28 | select ss_ext_sales_price as ext_price, 29 | ss_sold_date_sk as sold_date_sk, 30 | ss_item_sk as sold_item_sk, 31 | ss_sold_time_sk as time_sk 32 | from store_sales, 33 | date_dim 34 | where d_date_sk = ss_sold_date_sk 35 | and d_moy = 11 36 | and d_year = 1999 37 | ) tmp, 38 | time_dim 39 | where sold_item_sk = i_item_sk 40 | and i_manager_id = 1 41 | and time_sk = t_time_sk 42 | and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') 43 | group by i_brand, i_brand_id, t_hour, t_minute 44 | order by ext_price desc, i_brand_id 45 | ; 46 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query72.sql: -------------------------------------------------------------------------------- 1 | insert into query72 2 | select i_item_desc 3 | , w_warehouse_name 4 | , d1.d_week_seq 5 | , sum(case when p_promo_sk is null then 1 else 0 end) no_promo 6 | , sum(case when p_promo_sk is not null then 1 else 0 end) promo 7 | , count(*) total_cnt 8 | from catalog_sales 9 | join inventory on (cs_item_sk = inv_item_sk) 10 | join warehouse on (w_warehouse_sk = inv_warehouse_sk) 11 | join item on (i_item_sk = cs_item_sk) 12 | join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) 13 | join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) 14 | join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) 15 | join date_dim d2 on (inv_date_sk = d2.d_date_sk) 16 | join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) 17 | left outer join promotion on (cs_promo_sk = p_promo_sk) 18 | left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) 19 | where d1.d_week_seq = d2.d_week_seq 20 | and inv_quantity_on_hand < cs_quantity 21 | and d3.d_date > date_add(cast(d1.d_date as date), 5) 22 | and hd_buy_potential = '>10000' 23 | and d1.d_year = 1999 24 | and cd_marital_status = 'D' 25 | group by i_item_desc, w_warehouse_name, d1.d_week_seq 26 | order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq 27 | limit 100; 28 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query73.sql: -------------------------------------------------------------------------------- 1 | insert into query73 2 | select c_last_name 3 | , c_first_name 4 | , c_salutation 5 | , c_preferred_cust_flag 6 | , ss_ticket_number 7 | , cnt 8 | from (select ss_ticket_number 9 | , ss_customer_sk 10 | , count(*) cnt 11 | from store_sales, 12 | date_dim, 13 | store, 14 | household_demographics 15 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 16 | and store_sales.ss_store_sk = store.s_store_sk 17 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 18 | and date_dim.d_dom between 1 and 2 19 | and (household_demographics.hd_buy_potential = '>10000' or 20 | household_demographics.hd_buy_potential = 'Unknown') 21 | and household_demographics.hd_vehicle_count > 0 22 | and case 23 | when household_demographics.hd_vehicle_count > 0 then 24 | household_demographics.hd_dep_count / household_demographics.hd_vehicle_count 25 | else null end > 1 26 | and date_dim.d_year in (1999, 1999 + 1, 1999 + 2) 27 | and store.s_county in ('Williamson County', 'Franklin Parish', 'Bronx County', 'Orange County') 28 | group by ss_ticket_number, ss_customer_sk) dj, 29 | customer 30 | where ss_customer_sk = c_customer_sk 31 | and cnt between 1 and 5 32 | order by cnt desc, c_last_name asc; 33 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query74.sql: -------------------------------------------------------------------------------- 1 | with year_total as ( 2 | select c_customer_id customer_id 3 | , c_first_name customer_first_name 4 | , c_last_name customer_last_name 5 | , d_year as year 6 | , sum(ss_net_paid) year_total 7 | , 's' sale_type 8 | from customer 9 | , store_sales 10 | , date_dim 11 | where c_customer_sk = ss_customer_sk 12 | and ss_sold_date_sk = d_date_sk 13 | and d_year in (2001, 2001 + 1) 14 | group by c_customer_id 15 | , c_first_name 16 | , c_last_name 17 | , d_year 18 | union all 19 | select c_customer_id customer_id 20 | , c_first_name customer_first_name 21 | , c_last_name customer_last_name 22 | , d_year as year 23 | , sum(ws_net_paid) year_total 24 | , 'w' sale_type 25 | from customer 26 | , web_sales 27 | , date_dim 28 | where c_customer_sk = ws_bill_customer_sk 29 | and ws_sold_date_sk = d_date_sk 30 | and d_year in (2001, 2001 + 1) 31 | group by c_customer_id 32 | , c_first_name 33 | , c_last_name 34 | , d_year 35 | ) 36 | insert into query74 37 | select t_s_secyear.customer_id, 38 | t_s_secyear.customer_first_name, 39 | t_s_secyear.customer_last_name 40 | from year_total t_s_firstyear 41 | , year_total t_s_secyear 42 | , year_total t_w_firstyear 43 | , year_total t_w_secyear 44 | where t_s_secyear.customer_id = t_s_firstyear.customer_id 45 | and t_s_firstyear.customer_id = t_w_secyear.customer_id 46 | and t_s_firstyear.customer_id = t_w_firstyear.customer_id 47 | and t_s_firstyear.sale_type = 's' 48 | and t_w_firstyear.sale_type = 'w' 49 | and t_s_secyear.sale_type = 's' 50 | and t_w_secyear.sale_type = 'w' 51 | and t_s_firstyear.year = 2001 52 | and t_s_secyear.year = 2001 + 1 53 | and t_w_firstyear.year = 2001 54 | and t_w_secyear.year = 2001 + 1 55 | and t_s_firstyear.year_total > 0 56 | and t_w_firstyear.year_total > 0 57 | and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end 58 | > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end 59 | order by 1, 1, 1 60 | limit 100; 61 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query76.sql: -------------------------------------------------------------------------------- 1 | insert into query76 2 | select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt 3 | FROM ( 4 | SELECT 'store' as channel, 5 | 'ss_store_sk' col_name, 6 | d_year, 7 | d_qoy, 8 | i_category, 9 | ss_ext_sales_price ext_sales_price 10 | FROM store_sales, 11 | item, 12 | date_dim 13 | WHERE ss_store_sk IS NULL 14 | AND ss_sold_date_sk = d_date_sk 15 | AND ss_item_sk = i_item_sk 16 | UNION ALL 17 | SELECT 'web' as channel, 18 | 'ws_ship_customer_sk' col_name, 19 | d_year, 20 | d_qoy, 21 | i_category, 22 | ws_ext_sales_price ext_sales_price 23 | FROM web_sales, 24 | item, 25 | date_dim 26 | WHERE ws_ship_customer_sk IS NULL 27 | AND ws_sold_date_sk = d_date_sk 28 | AND ws_item_sk = i_item_sk 29 | UNION ALL 30 | SELECT 'catalog' as channel, 31 | 'cs_ship_addr_sk' col_name, 32 | d_year, 33 | d_qoy, 34 | i_category, 35 | cs_ext_sales_price ext_sales_price 36 | FROM catalog_sales, 37 | item, 38 | date_dim 39 | WHERE cs_ship_addr_sk IS NULL 40 | AND cs_sold_date_sk = d_date_sk 41 | AND cs_item_sk = i_item_sk) foo 42 | GROUP BY channel, col_name, d_year, d_qoy, i_category 43 | ORDER BY channel, col_name, d_year, d_qoy, i_category 44 | limit 100; 45 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query79.sql: -------------------------------------------------------------------------------- 1 | insert into query79 2 | select 3 | c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit 4 | from 5 | (select ss_ticket_number 6 | ,ss_customer_sk 7 | ,store.s_city 8 | ,sum(ss_coupon_amt) amt 9 | ,sum(ss_net_profit) profit 10 | from store_sales,date_dim,store,household_demographics 11 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 12 | and store_sales.ss_store_sk = store.s_store_sk 13 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 14 | and (household_demographics.hd_dep_count = 6 or household_demographics.hd_vehicle_count > 2) 15 | and date_dim.d_dow = 1 16 | and date_dim.d_year in (1999,1999+1,1999+2) 17 | and store.s_number_employees between 200 and 295 18 | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer 19 | where ss_customer_sk = c_customer_sk 20 | order by c_last_name,c_first_name,substr(s_city,1,30), profit 21 | limit 100; 22 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query81.sql: -------------------------------------------------------------------------------- 1 | with customer_total_return as 2 | (select cr_returning_customer_sk as ctr_customer_sk 3 | , 4 | ca_state as ctr_state, 5 | sum(cr_return_amt_inc_tax) as ctr_total_return 6 | from catalog_returns 7 | , date_dim 8 | , customer_address 9 | where cr_returned_date_sk = d_date_sk 10 | and d_year = 2000 11 | and cr_returning_addr_sk = ca_address_sk 12 | group by cr_returning_customer_sk 13 | , ca_state) 14 | insert into query81 15 | select c_customer_id 16 | , c_salutation 17 | , c_first_name 18 | , c_last_name 19 | , ca_street_number 20 | , ca_street_name 21 | , ca_street_type 22 | , ca_suite_number 23 | , ca_city 24 | , ca_county 25 | , ca_state 26 | , ca_zip 27 | , ca_country 28 | , ca_gmt_offset 29 | , ca_location_type 30 | , ctr_total_return 31 | from customer_total_return ctr1 32 | , customer_address 33 | , customer 34 | where ctr1.ctr_total_return > (select avg(ctr_total_return) * 1.2 35 | from customer_total_return ctr2 36 | where ctr1.ctr_state = ctr2.ctr_state) 37 | and ca_address_sk = c_current_addr_sk 38 | and ca_state = 'GA' 39 | and ctr1.ctr_customer_sk = c_customer_sk 40 | order by c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name 41 | , ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset 42 | , ca_location_type, ctr_total_return 43 | limit 100; 44 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query82.sql: -------------------------------------------------------------------------------- 1 | insert into query82 2 | select i_item_id 3 | , i_item_desc 4 | , i_current_price 5 | from item, 6 | inventory, 7 | date_dim, 8 | store_sales 9 | where i_current_price between 62 and 62 + 30 10 | and inv_item_sk = i_item_sk 11 | and d_date_sk = inv_date_sk 12 | and d_date between cast('2000-05-25' as date) and date_add(cast('2000-05-25' as date), 60) 13 | and i_manufact_id in (129, 270, 821, 423) 14 | and inv_quantity_on_hand between 100 and 500 15 | and ss_item_sk = i_item_sk 16 | group by i_item_id, i_item_desc, i_current_price 17 | order by i_item_id 18 | limit 100; 19 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query83.sql: -------------------------------------------------------------------------------- 1 | with sr_items as 2 | (select i_item_id item_id, 3 | sum(sr_return_quantity) sr_item_qty 4 | from store_returns, 5 | item, 6 | date_dim 7 | where sr_item_sk = i_item_sk 8 | and d_date in 9 | (select d_date 10 | from date_dim 11 | where d_week_seq in 12 | (select d_week_seq 13 | from date_dim 14 | where d_date in ('2000-06-30', '2000-09-27', '2000-11-17'))) 15 | and sr_returned_date_sk = d_date_sk 16 | group by i_item_id), 17 | cr_items as 18 | (select i_item_id item_id, 19 | sum(cr_return_quantity) cr_item_qty 20 | from catalog_returns, 21 | item, 22 | date_dim 23 | where cr_item_sk = i_item_sk 24 | and d_date in 25 | (select d_date 26 | from date_dim 27 | where d_week_seq in 28 | (select d_week_seq 29 | from date_dim 30 | where d_date in ('2000-06-30', '2000-09-27', '2000-11-17'))) 31 | and cr_returned_date_sk = d_date_sk 32 | group by i_item_id), 33 | wr_items as 34 | (select i_item_id item_id, 35 | sum(wr_return_quantity) wr_item_qty 36 | from web_returns, 37 | item, 38 | date_dim 39 | where wr_item_sk = i_item_sk 40 | and d_date in 41 | (select d_date 42 | from date_dim 43 | where d_week_seq in 44 | (select d_week_seq 45 | from date_dim 46 | where d_date in ('2000-06-30', '2000-09-27', '2000-11-17'))) 47 | and wr_returned_date_sk = d_date_sk 48 | group by i_item_id) 49 | insert into query83 50 | select sr_items.item_id 51 | , sr_item_qty 52 | , sr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 sr_dev 53 | , cr_item_qty 54 | , cr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 cr_dev 55 | , wr_item_qty 56 | , wr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 wr_dev 57 | , (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 average 58 | from sr_items 59 | , cr_items 60 | , wr_items 61 | where sr_items.item_id = cr_items.item_id 62 | and sr_items.item_id = wr_items.item_id 63 | order by sr_items.item_id 64 | , sr_item_qty 65 | limit 100; 66 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query84.sql: -------------------------------------------------------------------------------- 1 | insert into query84 2 | select c_customer_id as customer_id 3 | , concat(c_last_name, ', ', coalesce(c_first_name, '')) as customername 4 | from customer 5 | , customer_address 6 | , customer_demographics 7 | , household_demographics 8 | , income_band 9 | , store_returns 10 | where ca_city = 'Edgewood' 11 | and c_current_addr_sk = ca_address_sk 12 | and ib_lower_bound >= 38128 13 | and ib_upper_bound <= 38128 + 50000 14 | and ib_income_band_sk = hd_income_band_sk 15 | and cd_demo_sk = c_current_cdemo_sk 16 | and hd_demo_sk = c_current_hdemo_sk 17 | and sr_cdemo_sk = cd_demo_sk 18 | order by c_customer_id 19 | limit 100; 20 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query86.sql: -------------------------------------------------------------------------------- 1 | insert into query86 2 | select sum(ws_net_paid) as total_sum 3 | , i_category 4 | , i_class 5 | , grouping (i_category)+ grouping (i_class) as lochierarchy 6 | , rank() over ( 7 | partition by grouping (i_category)+ grouping (i_class) 8 | , case when grouping (i_class) = 0 then i_category end 9 | order by sum(ws_net_paid) desc) as rank_within_parent 10 | from web_sales 11 | , date_dim d1 12 | , item 13 | where d1.d_month_seq between 1200 and 1200 + 11 14 | and d1.d_date_sk = ws_sold_date_sk 15 | and i_item_sk = ws_item_sk 16 | group by rollup (i_category, i_class) 17 | order by lochierarchy desc, 18 | case when lochierarchy = 0 then i_category end, 19 | rank_within_parent 20 | limit 100; 21 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query87.sql: -------------------------------------------------------------------------------- 1 | insert into query87 2 | select count(*) 3 | from ((select distinct c_last_name, c_first_name, d_date 4 | from store_sales, 5 | date_dim, 6 | customer 7 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 8 | and store_sales.ss_customer_sk = customer.c_customer_sk 9 | and d_month_seq between 1200 and 1200 + 11) 10 | except 11 | (select distinct c_last_name, c_first_name, d_date 12 | from catalog_sales, 13 | date_dim, 14 | customer 15 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 16 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 17 | and d_month_seq between 1200 and 1200 + 11) 18 | except 19 | (select distinct c_last_name, c_first_name, d_date 20 | from web_sales, 21 | date_dim, 22 | customer 23 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 24 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 25 | and d_month_seq between 1200 and 1200 + 11) 26 | ) cool_cust 27 | ; 28 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query89.sql: -------------------------------------------------------------------------------- 1 | insert into query89 2 | select * 3 | from ( 4 | select i_category, 5 | i_class, 6 | i_brand, 7 | s_store_name, 8 | s_company_name, 9 | d_moy, 10 | sum(ss_sales_price) sum_sales, 11 | avg(sum(ss_sales_price)) over 12 | (partition by i_category, i_brand, s_store_name, s_company_name) 13 | avg_monthly_sales 14 | from item, 15 | store_sales, 16 | date_dim, 17 | store 18 | where ss_item_sk = i_item_sk 19 | and ss_sold_date_sk = d_date_sk 20 | and ss_store_sk = s_store_sk 21 | and d_year in (1999) 22 | and ((i_category in ('Books', 'Electronics', 'Sports') and 23 | i_class in ('computers', 'stereo', 'football') 24 | ) 25 | or (i_category in ('Men', 'Jewelry', 'Women') and 26 | i_class in ('shirts', 'birdal', 'dresses') 27 | )) 28 | group by i_category, i_class, i_brand, 29 | s_store_name, s_company_name, d_moy) tmp1 30 | where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 31 | 0.1 32 | order by sum_sales - avg_monthly_sales, s_store_name 33 | limit 100; 34 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query90.sql: -------------------------------------------------------------------------------- 1 | insert into query90 2 | select cast(amc as decimal(15, 4)) / cast(pmc as decimal(15, 4)) am_pm_ratio 3 | from (select count(*) amc 4 | from web_sales, 5 | household_demographics, 6 | time_dim, 7 | web_page 8 | where ws_sold_time_sk = time_dim.t_time_sk 9 | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk 10 | and ws_web_page_sk = web_page.wp_web_page_sk 11 | and time_dim.t_hour between 8 and 8 + 1 12 | and household_demographics.hd_dep_count = 6 13 | and web_page.wp_char_count between 5000 and 5200) at1, 14 | (select count(*) pmc 15 | from web_sales, 16 | household_demographics, 17 | time_dim, 18 | web_page 19 | where ws_sold_time_sk = time_dim.t_time_sk 20 | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk 21 | and ws_web_page_sk = web_page.wp_web_page_sk 22 | and time_dim.t_hour between 19 and 19 + 1 23 | and household_demographics.hd_dep_count = 6 24 | and web_page.wp_char_count between 5000 and 5200) pt 25 | order by am_pm_ratio 26 | limit 100; 27 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query91.sql: -------------------------------------------------------------------------------- 1 | insert into query91 2 | select cc_call_center_id Call_Center, 3 | cc_name Call_Center_Name, 4 | cc_manager Manager, 5 | sum(cr_net_loss) Returns_Loss 6 | from call_center, 7 | catalog_returns, 8 | date_dim, 9 | customer, 10 | customer_address, 11 | customer_demographics, 12 | household_demographics 13 | where cr_call_center_sk = cc_call_center_sk 14 | and cr_returned_date_sk = d_date_sk 15 | and cr_returning_customer_sk = c_customer_sk 16 | and cd_demo_sk = c_current_cdemo_sk 17 | and hd_demo_sk = c_current_hdemo_sk 18 | and ca_address_sk = c_current_addr_sk 19 | and d_year = 1998 20 | and d_moy = 11 21 | and ((cd_marital_status = 'M' and cd_education_status = 'Unknown') 22 | or (cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) 23 | and hd_buy_potential like 'Unknown%' 24 | and ca_gmt_offset = -7 25 | group by cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status 26 | order by sum(cr_net_loss) desc; 27 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query92.sql: -------------------------------------------------------------------------------- 1 | insert into query92 2 | select sum(ws_ext_discount_amt) as Excess_Discount_Amount 3 | from web_sales 4 | , item 5 | , date_dim 6 | where i_manufact_id = 350 7 | and i_item_sk = ws_item_sk 8 | and d_date between '2000-01-27' and 9 | date_add(cast('2000-01-27' as date), 90) 10 | and d_date_sk = ws_sold_date_sk 11 | and ws_ext_discount_amt 12 | > ( 13 | SELECT 1.3 * avg(ws_ext_discount_amt) 14 | FROM web_sales 15 | , date_dim 16 | WHERE ws_item_sk = i_item_sk 17 | and d_date between '2000-01-27' and 18 | date_add(cast('2000-01-27' as date), 90) 19 | and d_date_sk = ws_sold_date_sk 20 | ) 21 | order by sum(ws_ext_discount_amt) 22 | limit 100; 23 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query93.sql: -------------------------------------------------------------------------------- 1 | insert into query93 2 | select ss_customer_sk 3 | , sum(act_sales) sumsales 4 | from (select ss_item_sk 5 | , ss_ticket_number 6 | , ss_customer_sk 7 | , case 8 | when sr_return_quantity is not null then (ss_quantity - sr_return_quantity) * ss_sales_price 9 | else (ss_quantity * ss_sales_price) end act_sales 10 | from store_sales 11 | left outer join store_returns on (sr_item_sk = ss_item_sk 12 | and sr_ticket_number = ss_ticket_number) 13 | , reason 14 | where sr_reason_sk = r_reason_sk 15 | and r_reason_desc = 'reason 28') t 16 | group by ss_customer_sk 17 | order by sumsales, ss_customer_sk 18 | limit 100; 19 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query94.sql: -------------------------------------------------------------------------------- 1 | insert into query94 2 | select count(distinct ws_order_number) as order_count 3 | , sum(ws_ext_ship_cost) as total_shipping_cost 4 | , sum(ws_net_profit) as total_net_profit 5 | from web_sales ws1 6 | , date_dim 7 | , customer_address 8 | , web_site 9 | where d_date between cast('1999-2-01' as date) and 10 | date_add(cast('1999-2-01' as date), 60) 11 | and ws1.ws_ship_date_sk = d_date_sk 12 | and ws1.ws_ship_addr_sk = ca_address_sk 13 | and ca_state = 'IL' 14 | and ws1.ws_web_site_sk = web_site_sk 15 | and web_company_name = 'pri' 16 | and exists (select * 17 | from web_sales ws2 18 | where ws1.ws_order_number = ws2.ws_order_number 19 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 20 | and not exists (select * 21 | from web_returns wr1 22 | where ws1.ws_order_number = wr1.wr_order_number) 23 | order by count(distinct ws_order_number) 24 | limit 100; 25 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query95.sql: -------------------------------------------------------------------------------- 1 | with ws_wh as 2 | (select ws1.ws_order_number, ws1.ws_warehouse_sk wh1, ws2.ws_warehouse_sk wh2 3 | from web_sales ws1, 4 | web_sales ws2 5 | where ws1.ws_order_number = ws2.ws_order_number 6 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 7 | insert into query95 8 | select count(distinct ws_order_number) as order_count 9 | , sum(ws_ext_ship_cost) as total_shipping_cost 10 | , sum(ws_net_profit) as total_net_profit 11 | from web_sales ws1 12 | , date_dim 13 | , customer_address 14 | , web_site 15 | where d_date between '1999-2-01' and 16 | date_add(cast('1999-2-01' as date), 60) 17 | and ws1.ws_ship_date_sk = d_date_sk 18 | and ws1.ws_ship_addr_sk = ca_address_sk 19 | and ca_state = 'IL' 20 | and ws1.ws_web_site_sk = web_site_sk 21 | and web_company_name = 'pri' 22 | and ws1.ws_order_number in (select ws_order_number 23 | from ws_wh) 24 | and ws1.ws_order_number in (select wr_order_number 25 | from web_returns, 26 | ws_wh 27 | where wr_order_number = ws_wh.ws_order_number) 28 | order by count(distinct ws_order_number) 29 | limit 100; 30 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query96.sql: -------------------------------------------------------------------------------- 1 | insert into query96 2 | select count(*) 3 | from store_sales 4 | , household_demographics 5 | , time_dim 6 | , store 7 | where ss_sold_time_sk = time_dim.t_time_sk 8 | and ss_hdemo_sk = household_demographics.hd_demo_sk 9 | and ss_store_sk = s_store_sk 10 | and time_dim.t_hour = 20 11 | and time_dim.t_minute >= 30 12 | and household_demographics.hd_dep_count = 7 13 | and store.s_store_name = 'ese' 14 | order by count(*) 15 | limit 100; 16 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query97.sql: -------------------------------------------------------------------------------- 1 | with ssci as ( 2 | select ss_customer_sk customer_sk 3 | , ss_item_sk item_sk 4 | from store_sales, 5 | date_dim 6 | where ss_sold_date_sk = d_date_sk 7 | and d_month_seq between 1200 and 1200 + 11 8 | group by ss_customer_sk 9 | , ss_item_sk), 10 | csci as ( 11 | select cs_bill_customer_sk customer_sk 12 | , cs_item_sk item_sk 13 | from catalog_sales, 14 | date_dim 15 | where cs_sold_date_sk = d_date_sk 16 | and d_month_seq between 1200 and 1200 + 11 17 | group by cs_bill_customer_sk 18 | , cs_item_sk) 19 | insert into query97 20 | select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only 21 | , sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only 22 | , sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog 23 | from ssci 24 | full outer join csci on (ssci.customer_sk = csci.customer_sk 25 | and ssci.item_sk = csci.item_sk) 26 | limit 100; 27 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query98.sql: -------------------------------------------------------------------------------- 1 | insert into query98 2 | select i_item_id 3 | , i_item_desc 4 | , i_category 5 | , i_class 6 | , i_current_price 7 | , sum(ss_ext_sales_price) as itemrevenue 8 | , sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) over 9 | (partition by i_class) as revenueratio 10 | from store_sales 11 | , item 12 | , date_dim 13 | where ss_item_sk = i_item_sk 14 | and i_category in ('Sports', 'Books', 'Home') 15 | and ss_sold_date_sk = d_date_sk 16 | and d_date between cast('1999-02-22' as date) 17 | and date_add(cast('1999-02-22' as date), 30) 18 | group by i_item_id 19 | , i_item_desc 20 | , i_category 21 | , i_class 22 | , i_current_price 23 | order by i_category 24 | , i_class 25 | , i_item_id 26 | , i_item_desc 27 | , revenueratio; 28 | -------------------------------------------------------------------------------- /sqllineage/data/tpcds/query99.sql: -------------------------------------------------------------------------------- 1 | insert into query99 2 | select substr(w_warehouse_name, 1, 20) 3 | , sm_type 4 | , cc_name 5 | , sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30) then 1 else 0 end) as 30_days 6 | , sum(case 7 | when (cs_ship_date_sk - cs_sold_date_sk > 30) and 8 | (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 9 | else 0 end) as 31_60_days 10 | , sum(case 11 | when (cs_ship_date_sk - cs_sold_date_sk > 60) and 12 | (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 13 | else 0 end) as 61_90_days 14 | , sum(case 15 | when (cs_ship_date_sk - cs_sold_date_sk > 90) and 16 | (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 17 | else 0 end) as 91_120_days 18 | , sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as above120_days 19 | from catalog_sales 20 | , warehouse 21 | , ship_mode 22 | , call_center 23 | , date_dim 24 | where d_month_seq between 1200 and 1200 + 11 25 | and cs_ship_date_sk = d_date_sk 26 | and cs_warehouse_sk = w_warehouse_sk 27 | and cs_ship_mode_sk = sm_ship_mode_sk 28 | and cs_call_center_sk = cc_call_center_sk 29 | group by substr(w_warehouse_name, 1, 20) 30 | , sm_type 31 | , cc_name 32 | order by substr(w_warehouse_name, 1, 20) 33 | , sm_type 34 | , cc_name 35 | limit 100; 36 | -------------------------------------------------------------------------------- /sqllineage/exceptions.py: -------------------------------------------------------------------------------- 1 | class SQLLineageException(Exception): 2 | """Base Exception for SQLLineage""" 3 | 4 | 5 | class UnsupportedStatementException(SQLLineageException): 6 | """Raised for SQL statement that SQLLineage doesn't support analyzing""" 7 | 8 | 9 | class InvalidSyntaxException(SQLLineageException): 10 | """Raised for SQL statement that parser cannot parse""" 11 | 12 | 13 | class MetaDataProviderException(SQLLineageException): 14 | """Raised for MetaDataProvider errors""" 15 | 16 | 17 | class ConfigException(SQLLineageException): 18 | """Raised for configuration errors""" 19 | -------------------------------------------------------------------------------- /sqllineage/io.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from networkx import DiGraph 4 | 5 | 6 | def to_cytoscape(graph: DiGraph, compound=False) -> list[dict[str, dict[str, Any]]]: 7 | """ 8 | compound nodes is used to group nodes together to their parent. 9 | See https://js.cytoscape.org/#notation/compound-nodes for reference. 10 | """ 11 | if compound: 12 | parents_dict = { 13 | node.parent: { 14 | "name": str(node.parent) if node.parent is not None else "", 15 | "type": ( 16 | type(node.parent).__name__ 17 | if node.parent is not None 18 | else "Table or SubQuery" 19 | ), 20 | } 21 | for node in graph.nodes 22 | } 23 | nodes = [ 24 | { 25 | "data": { 26 | "id": str(node), 27 | "parent": parents_dict[node.parent]["name"], 28 | "parent_candidates": [ 29 | {"name": str(p), "type": type(p).__name__} 30 | for p in node.parent_candidates 31 | ], 32 | "type": type(node).__name__, 33 | } 34 | } 35 | for node in graph.nodes 36 | ] 37 | nodes += [ 38 | {"data": {"id": attr["name"], "type": attr["type"]}} 39 | for _, attr in parents_dict.items() 40 | ] 41 | else: 42 | nodes = [{"data": {"id": str(node)}} for node in graph.nodes] 43 | edges: list[dict[str, dict[str, Any]]] = [ 44 | {"data": {"id": f"e{i}", "source": str(edge[0]), "target": str(edge[1])}} 45 | for i, edge in enumerate(graph.edges) 46 | ] 47 | return nodes + edges 48 | -------------------------------------------------------------------------------- /sqllineage/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/sqllineage/utils/__init__.py -------------------------------------------------------------------------------- /sqllineage/utils/constant.py: -------------------------------------------------------------------------------- 1 | class NodeTag: 2 | READ = "read" 3 | WRITE = "write" 4 | CTE = "cte" 5 | DROP = "drop" 6 | SOURCE_ONLY = "source_only" 7 | TARGET_ONLY = "target_only" 8 | SELFLOOP = "selfloop" 9 | 10 | 11 | class EdgeTag: 12 | INDEX = "index" 13 | 14 | 15 | class EdgeType: 16 | LINEAGE = "lineage" 17 | RENAME = "rename" 18 | HAS_COLUMN = "has_column" 19 | HAS_ALIAS = "has_alias" 20 | 21 | 22 | class LineageLevel: 23 | TABLE = "table" 24 | COLUMN = "column" 25 | -------------------------------------------------------------------------------- /sqllineage/utils/entities.py: -------------------------------------------------------------------------------- 1 | from typing import Any, NamedTuple, Optional, Union 2 | 3 | from sqllineage.core.models import Column, SubQuery, Table 4 | 5 | 6 | class SubQueryTuple(NamedTuple): 7 | parenthesis: Any 8 | alias: Optional[str] 9 | 10 | 11 | class ColumnQualifierTuple(NamedTuple): 12 | column: str 13 | qualifier: Optional[str] 14 | 15 | 16 | class AnalyzerContext(NamedTuple): 17 | # CTE queries that can be select from in current query context 18 | cte: Optional[set[SubQuery]] = None 19 | # table that current top-level query is writing to, subquery in case of subquery context 20 | write: Optional[set[Union[SubQuery, Table]]] = None 21 | # columns that write table specifies, used for `INSERT INTO x (col1, col2) SELECT` syntax 22 | write_columns: Optional[list[Column]] = None 23 | -------------------------------------------------------------------------------- /sqllineagejs/.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "ignorePatterns": ["build/"] 3 | } 4 | -------------------------------------------------------------------------------- /sqllineagejs/eslint.config.js: -------------------------------------------------------------------------------- 1 | import js from '@eslint/js' 2 | import globals from 'globals' 3 | import reactHooks from 'eslint-plugin-react-hooks' 4 | import reactRefresh from 'eslint-plugin-react-refresh' 5 | import importPlugin from 'eslint-plugin-import' 6 | import { defineConfig, globalIgnores } from 'eslint/config' 7 | 8 | /* eslint import/no-unused-modules: "off" */ 9 | export default defineConfig([ 10 | globalIgnores(['build']), 11 | { 12 | files: ['**/*.{js,jsx}'], 13 | plugins: { 14 | import: importPlugin, 15 | }, 16 | extends: [ 17 | js.configs.recommended, 18 | reactHooks.configs['recommended-latest'], 19 | reactRefresh.configs.vite, 20 | ], 21 | languageOptions: { 22 | ecmaVersion: 2020, 23 | globals: globals.browser, 24 | parserOptions: { 25 | ecmaVersion: 'latest', 26 | ecmaFeatures: { jsx: true }, 27 | sourceType: 'module', 28 | }, 29 | }, 30 | rules: { 31 | 'no-unused-vars': ['error', { varsIgnorePattern: '^[A-Z_]' }], 32 | 'import/no-unused-modules': [1, { unusedExports: true }], 33 | 'import/order': [ 34 | 'warn', 35 | { 36 | groups: ['builtin', 'external', 'internal', 'parent', 'sibling', 'index'], 37 | 'newlines-between': 'always', 38 | }, 39 | ], 40 | }, 41 | }, 42 | ]) 43 | -------------------------------------------------------------------------------- /sqllineagejs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 11 | SQLLineage 12 | 13 | 14 |
15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /sqllineagejs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sqllineagejs", 3 | "version": "1.5.5", 4 | "private": true, 5 | "type": "module", 6 | "dependencies": { 7 | "@emotion/react": "^11.14.0", 8 | "@emotion/styled": "^11.14.1", 9 | "@monaco-editor/react": "^4.7.0", 10 | "@mui/icons-material": "^7.2.0", 11 | "@mui/material": "^7.2.0", 12 | "@mui/x-tree-view": "^8.7.0", 13 | "@reduxjs/toolkit": "^2.8.2", 14 | "cytoscape": "^3.32.0", 15 | "cytoscape-dagre": "^2.3.3", 16 | "react": "^19.1.0", 17 | "react-cytoscapejs": "^2.0.0", 18 | "react-dom": "^19.1.0", 19 | "react-redux": "^9.2.0", 20 | "react-router-dom": "^7.6.3" 21 | }, 22 | "scripts": { 23 | "start": "vite", 24 | "build": "vite build", 25 | "preview": "vite preview", 26 | "deploy": "VITE_BACKEND_API=https://sqllineage.azurewebsites.net/api/sqllineage/ vite build --base=/sqllineage/ && gh-pages -d build", 27 | "lint": "eslint .", 28 | "format": "prettier --write \"src/**/*.{js,jsx,json,css,md}\"" 29 | }, 30 | "browserslist": { 31 | "production": [ 32 | ">0.2%", 33 | "not dead", 34 | "not op_mini all" 35 | ], 36 | "development": [ 37 | "last 1 chrome version", 38 | "last 1 firefox version", 39 | "last 1 safari version" 40 | ] 41 | }, 42 | "devDependencies": { 43 | "@eslint/js": "^9.32.0", 44 | "@types/react": "^19.1.8", 45 | "@types/react-dom": "^19.1.6", 46 | "@vitejs/plugin-react": "^4.7.0", 47 | "eslint": "^9.32.0", 48 | "eslint-plugin-import": "^2.32.0", 49 | "eslint-plugin-react-hooks": "^5.2.0", 50 | "eslint-plugin-react-refresh": "^0.4.20", 51 | "gh-pages": "^5.0.0", 52 | "globals": "^16.3.0", 53 | "prettier": "^3.6.2", 54 | "vite": "^7.1.5" 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /sqllineagejs/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/sqllineagejs/public/favicon.ico -------------------------------------------------------------------------------- /sqllineagejs/src/api/client.js: -------------------------------------------------------------------------------- 1 | import { BACKEND_API } from "../config.js"; 2 | 3 | // A tiny wrapper around fetch(), borrowed from 4 | // https://kentcdodds.com/blog/replace-axios-with-a-simple-custom-fetch-wrapper 5 | export function assemble_absolute_endpoint(relative_endpoint) { 6 | let api_prefix = BACKEND_API 7 | ? BACKEND_API 8 | : new URL(window.location.href).origin; 9 | return `${api_prefix}${relative_endpoint}`; 10 | } 11 | 12 | export async function client(endpoint, { body, ...customConfig } = {}) { 13 | const headers = { "Content-Type": "application/json" }; 14 | 15 | const config = { 16 | method: body ? "POST" : "GET", 17 | ...customConfig, 18 | headers: { 19 | ...headers, 20 | ...customConfig.headers, 21 | }, 22 | }; 23 | 24 | if (body) { 25 | config.body = JSON.stringify(body); 26 | } 27 | 28 | let data; 29 | const response = await window.fetch(endpoint, config); 30 | data = await response.json(); 31 | if (response.ok) { 32 | return data; 33 | } else { 34 | // 4XX or 5XX, try use response.json() first, if not then fallback to statusText 35 | return Promise.reject(data ? data : response.statusText); 36 | } 37 | } 38 | 39 | client.get = function (endpoint, customConfig = {}) { 40 | return client(endpoint, { ...customConfig, method: "GET" }); 41 | }; 42 | 43 | client.post = function (endpoint, body, customConfig = {}) { 44 | return client(endpoint, { ...customConfig, body }); 45 | }; 46 | -------------------------------------------------------------------------------- /sqllineagejs/src/app/store.js: -------------------------------------------------------------------------------- 1 | import { configureStore } from "@reduxjs/toolkit"; 2 | 3 | import directoryReducer from "../features/directory/directorySlice"; 4 | import editorReducer from "../features/editor/editorSlice"; 5 | 6 | export default configureStore({ 7 | reducer: { 8 | directory: directoryReducer, 9 | editor: editorReducer, 10 | }, 11 | }); 12 | -------------------------------------------------------------------------------- /sqllineagejs/src/config.js: -------------------------------------------------------------------------------- 1 | // base URL when deploying the frontend app 2 | export const BASE_URL = import.meta.env.BASE_URL; 3 | // backend API URL, used by the client to make API requests 4 | export const BACKEND_API = import.meta.env.VITE_BACKEND_API; 5 | -------------------------------------------------------------------------------- /sqllineagejs/src/features/directory/Directory.jsx: -------------------------------------------------------------------------------- 1 | import React, { useEffect } from "react"; 2 | import { useDispatch, useSelector } from "react-redux"; 3 | import { Snackbar } from "@mui/material"; 4 | 5 | import { Loading } from "../widget/Loading"; 6 | import { LoadError } from "../widget/LoadError"; 7 | 8 | import { 9 | fetchRootDirectory, 10 | selectDirectory, 11 | setOpenNonSQLWarning, 12 | } from "./directorySlice"; 13 | import DirectoryTreeItem from "./DirectoryTreeItem"; 14 | 15 | export function Directory(props) { 16 | const dispatch = useDispatch(); 17 | const directoryState = useSelector(selectDirectory); 18 | 19 | useEffect(() => { 20 | if (directoryState.status === "idle") { 21 | let url = new URL(window.location.href); 22 | dispatch(fetchRootDirectory(Object.fromEntries(url.searchParams))); 23 | } 24 | }); 25 | 26 | if (directoryState.status === "loading") { 27 | return ; 28 | } else if (directoryState.status === "failed") { 29 | return ( 30 | 31 | ); 32 | } else { 33 | return ( 34 |
35 | 41 | { 49 | dispatch(setOpenNonSQLWarning(false)); 50 | }} 51 | message="Non SQL File Is Not Supported" 52 | /> 53 |
54 | ); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /sqllineagejs/src/features/directory/directorySlice.js: -------------------------------------------------------------------------------- 1 | import { createAsyncThunk, createSlice } from "@reduxjs/toolkit"; 2 | 3 | import { assemble_absolute_endpoint, client } from "../../api/client"; 4 | 5 | const initialState = { 6 | content: {}, 7 | status: "idle", 8 | error: null, 9 | openNonSQLWarning: false, 10 | }; 11 | 12 | export const DirectoryAPI = async (payload) => { 13 | return await client.post(assemble_absolute_endpoint("/directory"), payload); 14 | }; 15 | 16 | export const fetchRootDirectory = createAsyncThunk( 17 | "directory/fetchDirectory", 18 | DirectoryAPI, 19 | ); 20 | 21 | export const directorySlice = createSlice({ 22 | name: "directory", 23 | initialState, 24 | reducers: { 25 | setOpenNonSQLWarning(state, action) { 26 | state.openNonSQLWarning = action.payload; 27 | }, 28 | }, 29 | extraReducers: (builder) => { 30 | builder 31 | .addCase(fetchRootDirectory.pending, (state) => { 32 | state.status = "loading"; 33 | }) 34 | .addCase(fetchRootDirectory.fulfilled, (state, action) => { 35 | state.status = "succeeded"; 36 | state.content = action.payload; 37 | }) 38 | .addCase(fetchRootDirectory.rejected, (state, action) => { 39 | state.status = "failed"; 40 | state.content = action.error.message; 41 | }); 42 | }, 43 | }); 44 | 45 | export const selectDirectory = (state) => state.directory; 46 | export const { setOpenNonSQLWarning } = directorySlice.actions; 47 | 48 | export default directorySlice.reducer; 49 | -------------------------------------------------------------------------------- /sqllineagejs/src/features/editor/DAGDesc.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { useSelector } from "react-redux"; 3 | import { Editor as MonacoEditor } from "@monaco-editor/react"; 4 | 5 | import { LoadError } from "../widget/LoadError"; 6 | import { Loading } from "../widget/Loading"; 7 | 8 | import { selectEditor } from "./editorSlice"; 9 | 10 | export function DAGDesc(props) { 11 | const editorState = useSelector(selectEditor); 12 | 13 | if (editorState.dagStatus === "loading") { 14 | return ; 15 | } else if (editorState.dagStatus === "failed") { 16 | return ( 17 | 24 | ); 25 | } else { 26 | const options = { 27 | minimap: { enabled: false }, 28 | readOnly: true, 29 | wordWrap: "on", 30 | automaticLayout: true, 31 | }; 32 | return ( 33 | 39 | ); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /sqllineagejs/src/features/widget/LoadError.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { Box } from "@mui/material"; 3 | import EmojiPeopleOutlinedIcon from "@mui/icons-material/EmojiPeopleOutlined"; 4 | import ErrorOutlinedIcon from "@mui/icons-material/Error"; 5 | 6 | export function LoadError(props) { 7 | let Icon = props.info ? EmojiPeopleOutlinedIcon : ErrorOutlinedIcon; 8 | return ( 9 | 15 | 16 | 17 | {props.message 18 | ? props.message 19 | .split("\n") 20 | .map((line, idx) =>

{line}

) 21 | : ""} 22 |
23 |
24 | ); 25 | } 26 | -------------------------------------------------------------------------------- /sqllineagejs/src/features/widget/Loading.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import Fade from "@mui/material/Fade"; 3 | import CircularProgress from "@mui/material/CircularProgress"; 4 | import { Box } from "@mui/material"; 5 | 6 | export function Loading(props) { 7 | return ( 8 | 14 | 21 | 22 | 23 | 24 | ); 25 | } 26 | -------------------------------------------------------------------------------- /sqllineagejs/src/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | font-family: 4 | -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen", "Ubuntu", 5 | "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", sans-serif; 6 | -webkit-font-smoothing: antialiased; 7 | -moz-osx-font-smoothing: grayscale; 8 | } 9 | 10 | code { 11 | font-family: 12 | source-code-pro, Menlo, Monaco, Consolas, "Courier New", monospace; 13 | } 14 | 15 | /* width */ 16 | ::-webkit-scrollbar { 17 | width: 5px; 18 | height: 5px; 19 | } 20 | 21 | /* Track */ 22 | ::-webkit-scrollbar-track { 23 | background: #f1f1f1; 24 | } 25 | 26 | /* Handle */ 27 | ::-webkit-scrollbar-thumb { 28 | background: #888; 29 | } 30 | 31 | /* Handle on hover */ 32 | ::-webkit-scrollbar-thumb:hover { 33 | background: #555; 34 | } 35 | -------------------------------------------------------------------------------- /sqllineagejs/src/main.jsx: -------------------------------------------------------------------------------- 1 | import { createRoot } from "react-dom/client"; 2 | import React from "react"; 3 | import { Provider } from "react-redux"; 4 | 5 | import "./index.css"; 6 | import App from "./App"; 7 | import store from "./app/store"; 8 | 9 | const root = createRoot(document.getElementById("root")); 10 | 11 | root.render( 12 | 13 | 14 | 15 | 16 | , 17 | ); 18 | -------------------------------------------------------------------------------- /sqllineagejs/vite.config.js: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite' 2 | import react from '@vitejs/plugin-react' 3 | 4 | // https://vite.dev/config/ 5 | export default defineConfig({ 6 | root: '.', 7 | build: { 8 | outDir: './build', 9 | emptyOutDir: true, 10 | }, 11 | plugins: [react()], 12 | }) 13 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/tests/__init__.py -------------------------------------------------------------------------------- /tests/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/tests/core/__init__.py -------------------------------------------------------------------------------- /tests/core/test_cli.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from unittest.mock import patch 4 | 5 | import pytest 6 | 7 | from sqllineage.cli import main 8 | from sqllineage.config import SQLLineageConfig 9 | 10 | 11 | @patch("socketserver.BaseServer.serve_forever") 12 | def test_cli_dummy(_): 13 | main([]) 14 | main(["-e", "select * from dual"]) 15 | main(["-e", "insert into foo select * from dual", "-l", "column"]) 16 | for dirname, _, files in os.walk(SQLLineageConfig.DIRECTORY): 17 | if len(files) > 0: 18 | sql_file = str(Path(dirname).joinpath(Path(files[0]))) 19 | main(["-f", sql_file]) 20 | main(["-e", "select * from dual", "-f", sql_file]) 21 | main(["-f", sql_file, "-g"]) 22 | main(["-f", sql_file, "--silent_mode"]) 23 | main(["-f", sql_file, "--sqlalchemy_url=sqlite:///:memory:"]) 24 | main(["--sqlalchemy_url=sqlite:///:memory:", "-g"]) 25 | break 26 | main(["-g"]) 27 | main(["-ds"]) 28 | main( 29 | [ 30 | "-e", 31 | "insert overwrite table tab1 select * from tab1 union select * from tab2", 32 | "-g", 33 | ] 34 | ) 35 | 36 | 37 | def test_file_exception(): 38 | for args in (["-f", str(Path().absolute())], ["-f", "nonexist_file"]): 39 | with pytest.raises(SystemExit) as e: 40 | main(args) 41 | assert e.value.code == 1 42 | 43 | 44 | @patch("builtins.open", side_effect=PermissionError()) 45 | def test_file_permission_error(_): 46 | with pytest.raises(SystemExit) as e: 47 | main(["-f", __file__]) 48 | assert e.value.code == 1 49 | -------------------------------------------------------------------------------- /tests/core/test_exception.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | from sqllineage import SQLPARSE_DIALECT 7 | from sqllineage.exceptions import ( 8 | InvalidSyntaxException, 9 | SQLLineageException, 10 | UnsupportedStatementException, 11 | ) 12 | from sqllineage.runner import LineageRunner 13 | 14 | 15 | def test_select_without_table(): 16 | with pytest.raises(SQLLineageException): 17 | LineageRunner("select * from where foo='bar'")._eval() 18 | with pytest.raises(SQLLineageException): 19 | LineageRunner("select * from where foo='bar'", dialect=SQLPARSE_DIALECT)._eval() 20 | 21 | 22 | def test_full_unparsable_query_in_sqlfluff(): 23 | with pytest.raises(InvalidSyntaxException): 24 | LineageRunner("WRONG SELECT FROM tab1")._eval() 25 | 26 | 27 | def test_partial_unparsable_query_in_sqlfluff(): 28 | with pytest.raises(InvalidSyntaxException): 29 | LineageRunner("SELECT * FROM tab1 AS FULL FULL OUTER JOIN tab2")._eval() 30 | 31 | 32 | def test_partial_unparsable_query_in_sqlfluff_with_tsql_batch(): 33 | sql = """SELECT * 34 | INTO tgt 35 | FROM tab1 src1 AS src1 36 | CROSS JOIN tab2 AS src2""" 37 | with pytest.raises(InvalidSyntaxException): 38 | LineageRunner(sql, dialect="tsql")._eval() 39 | 40 | 41 | def test_unsupported_query_type_in_sqlfluff(): 42 | with pytest.raises(UnsupportedStatementException): 43 | LineageRunner("CREATE UNIQUE INDEX title_idx ON films (title)")._eval() 44 | 45 | 46 | def test_deprecation_warning_in_sqlparse(): 47 | with pytest.warns(DeprecationWarning): 48 | LineageRunner("SELECT * FROM DUAL", dialect=SQLPARSE_DIALECT)._eval() 49 | 50 | 51 | def test_syntax_warning_no_semicolon_in_tsql(): 52 | with pytest.warns(SyntaxWarning): 53 | LineageRunner( 54 | """SELECT * FROM foo 55 | SELECT * FROM bar""", 56 | dialect="tsql", 57 | )._eval() 58 | 59 | 60 | @patch.dict(os.environ, {"SQLLINEAGE_TSQL_NO_SEMICOLON": "TRUE"}) 61 | def test_user_warning_enable_tsql_no_semicolon_with_other_dialect(): 62 | with pytest.warns(UserWarning): 63 | LineageRunner( 64 | """SELECT * FROM foo; 65 | SELECT * FROM bar""", 66 | )._eval() 67 | -------------------------------------------------------------------------------- /tests/core/test_holder.py: -------------------------------------------------------------------------------- 1 | from sqllineage.core.holders import StatementLineageHolder 2 | 3 | 4 | def test_dummy(): 5 | assert str(StatementLineageHolder()) == repr(StatementLineageHolder()) 6 | -------------------------------------------------------------------------------- /tests/core/test_metadata_provider.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from sqllineage.core.metadata.sqlalchemy import SQLAlchemyMetaDataProvider 6 | from sqllineage.exceptions import MetaDataProviderException 7 | 8 | 9 | def test_sqlalchemy_metadata_provider_connection_fail(): 10 | # connect to a directory as sqlite db, which is not possible. Simulate connection failure 11 | with pytest.raises(MetaDataProviderException): 12 | SQLAlchemyMetaDataProvider(f"sqlite:///{os.path.dirname(__file__)}") 13 | 14 | 15 | def test_sqlalchemy_metadata_provider_driver_not_install(): 16 | # use an unknown driver to connect. Simulate driver not installed 17 | with pytest.raises(MetaDataProviderException): 18 | SQLAlchemyMetaDataProvider("sqlite+unknown_driver:///:memory:") 19 | 20 | 21 | def test_sqlalchemy_metadata_provider_query_fail(): 22 | provider = SQLAlchemyMetaDataProvider("sqlite:///:memory:") 23 | assert ( 24 | provider._get_table_columns("non_existing_schema", "non_existing_table") == [] 25 | ) 26 | assert provider._get_table_columns("main", "non_existing_table") == [] 27 | -------------------------------------------------------------------------------- /tests/core/test_models.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from sqlparse.sql import Parenthesis 3 | 4 | from sqllineage.core.models import Column, Path, Schema, SubQuery, Table 5 | from sqllineage.exceptions import SQLLineageException 6 | 7 | 8 | def test_repr_dummy(): 9 | assert repr(Schema()) 10 | assert repr(Table("")) 11 | assert repr(Table("a.b.c")) 12 | assert repr(SubQuery(Parenthesis(), Parenthesis().value, "")) 13 | assert repr(Column("a.b")) 14 | assert repr(Path("")) 15 | with pytest.raises(SQLLineageException): 16 | Table("a.b.c.d") 17 | with pytest.warns(Warning): 18 | Table("a.b", Schema("c")) 19 | 20 | 21 | def test_hash_eq(): 22 | assert Schema("a") == Schema("a") 23 | assert len({Schema("a"), Schema("a")}) == 1 24 | assert Table("a") == Table("a") 25 | assert len({Table("a"), Table("a")}) == 1 26 | 27 | 28 | def test_of_dummy(): 29 | with pytest.raises(NotImplementedError): 30 | Column.of("") 31 | with pytest.raises(NotImplementedError): 32 | Table.of("") 33 | with pytest.raises(NotImplementedError): 34 | SubQuery.of("", None) 35 | -------------------------------------------------------------------------------- /tests/core/test_parser.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | from sqlfluff.core import Linter 4 | 5 | from sqllineage.core.parser.sqlfluff.models import SqlFluffColumn 6 | from sqllineage.core.parser.sqlfluff.utils import find_from_expression_element 7 | 8 | 9 | def test_column_extract_source_columns(): 10 | segment_mock = Mock() 11 | segment_mock.type = "" 12 | assert [] == SqlFluffColumn._extract_source_columns(segment_mock) 13 | 14 | 15 | def test_return_none_find_from_expression_element(): 16 | file_segment = Linter(dialect="ansi").parse_string("TRUNCATE TABLE tab").tree 17 | assert find_from_expression_element(file_segment) is None 18 | -------------------------------------------------------------------------------- /tests/sql/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/tests/sql/__init__.py -------------------------------------------------------------------------------- /tests/sql/column/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/tests/sql/column/__init__.py -------------------------------------------------------------------------------- /tests/sql/column/multiple_statements/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/tests/sql/column/multiple_statements/__init__.py -------------------------------------------------------------------------------- /tests/sql/column/multiple_statements/test_session_metadata.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from sqllineage.core.metadata_provider import MetaDataProvider 4 | from sqllineage.utils.entities import ColumnQualifierTuple 5 | 6 | from ....helpers import assert_column_lineage_equal, generate_metadata_providers 7 | 8 | providers = generate_metadata_providers( 9 | { 10 | "db1.table1": ["id", "a", "b", "c", "d"], 11 | "db2.table2": ["id", "h", "i", "j", "k"], 12 | "db3.table3": ["pk", "p", "q", "r"], 13 | } 14 | ) 15 | 16 | 17 | @pytest.mark.parametrize("provider", providers) 18 | def test_do_not_register_session_metadata_for_update_statement( 19 | provider: MetaDataProvider, 20 | ): 21 | sql = """UPDATE db1.table1 SET a = 1; 22 | 23 | CREATE TABLE db1.foo AS 24 | SELECT a, b, c 25 | FROM db1.table1 tab1 26 | INNER JOIN db1.table2 tab2 ON tab1.id = tab2.id 27 | """ 28 | assert_column_lineage_equal( 29 | sql, 30 | [ 31 | ( 32 | ColumnQualifierTuple("a", "db1.table1"), 33 | ColumnQualifierTuple("a", "db1.foo"), 34 | ), 35 | ( 36 | ColumnQualifierTuple("b", "db1.table1"), 37 | ColumnQualifierTuple("b", "db1.foo"), 38 | ), 39 | ( 40 | ColumnQualifierTuple("c", "db1.table1"), 41 | ColumnQualifierTuple("c", "db1.foo"), 42 | ), 43 | ], 44 | metadata_provider=provider, 45 | ) 46 | -------------------------------------------------------------------------------- /tests/sql/column/test_column_select_case_when.py: -------------------------------------------------------------------------------- 1 | from sqllineage.utils.entities import ColumnQualifierTuple 2 | 3 | from ...helpers import assert_column_lineage_equal 4 | 5 | 6 | def test_select_column_using_case_when(): 7 | sql = """INSERT INTO tab1 8 | SELECT CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' END 9 | FROM tab2""" 10 | assert_column_lineage_equal( 11 | sql, 12 | [ 13 | ( 14 | ColumnQualifierTuple("col1", "tab2"), 15 | ColumnQualifierTuple( 16 | "CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' END", "tab1" 17 | ), 18 | ), 19 | ], 20 | ) 21 | sql = """INSERT INTO tab1 22 | SELECT CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' END AS col2 23 | FROM tab2""" 24 | assert_column_lineage_equal( 25 | sql, 26 | [(ColumnQualifierTuple("col1", "tab2"), ColumnQualifierTuple("col2", "tab1"))], 27 | ) 28 | 29 | 30 | def test_select_column_using_case_when_with_subquery(): 31 | sql = """INSERT INTO tab1 32 | SELECT CASE WHEN (SELECT avg(col1) FROM tab3) > 0 AND col2 = 1 THEN (SELECT avg(col1) FROM tab3) ELSE 0 END AS col1 33 | FROM tab4""" 34 | assert_column_lineage_equal( 35 | sql, 36 | [ 37 | ( 38 | ColumnQualifierTuple("col2", "tab4"), 39 | ColumnQualifierTuple("col1", "tab1"), 40 | ), 41 | ( 42 | ColumnQualifierTuple("col1", "tab3"), 43 | ColumnQualifierTuple("col1", "tab1"), 44 | ), 45 | ], 46 | ) 47 | 48 | 49 | def test_select_column_using_multiple_case_when_with_subquery(): 50 | sql = """INSERT INTO tab1 51 | SELECT CASE 52 | WHEN (SELECT avg(col1) FROM tab3) > 0 AND col2 = 1 THEN (SELECT avg(col1) FROM tab3) 53 | WHEN (SELECT avg(col1) FROM tab3) > 0 AND col2 = 1 THEN (SELECT avg(col1) FROM tab5) ELSE 0 END AS col1 54 | FROM tab4""" 55 | assert_column_lineage_equal( 56 | sql, 57 | [ 58 | ( 59 | ColumnQualifierTuple("col2", "tab4"), 60 | ColumnQualifierTuple("col1", "tab1"), 61 | ), 62 | ( 63 | ColumnQualifierTuple("col1", "tab3"), 64 | ColumnQualifierTuple("col1", "tab1"), 65 | ), 66 | ( 67 | ColumnQualifierTuple("col1", "tab5"), 68 | ColumnQualifierTuple("col1", "tab1"), 69 | ), 70 | ], 71 | ) 72 | -------------------------------------------------------------------------------- /tests/sql/column/test_column_select_column_dialect_specific.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from sqllineage.utils.entities import ColumnQualifierTuple 4 | 5 | from ...helpers import assert_column_lineage_equal 6 | 7 | 8 | @pytest.mark.parametrize("dialect", ["tsql"]) 9 | def test_tsql_assignment_operator(dialect: str): 10 | """ 11 | Assignment Operator is a Transact-SQL specific feature, used interchangeably with column alias 12 | https://learn.microsoft.com/en-us/sql/t-sql/language-elements/assignment-operator-transact-sql?view=sql-server-ver15 13 | """ 14 | sql = """INSERT INTO foo 15 | SELECT FirstColumnHeading = 'xyz', 16 | SecondColumnHeading = ProductID 17 | FROM Production.Product""" 18 | assert_column_lineage_equal( 19 | sql, 20 | [ 21 | ( 22 | ColumnQualifierTuple("ProductID", "Production.Product"), 23 | ColumnQualifierTuple("SecondColumnHeading", "foo"), 24 | ) 25 | ], 26 | dialect=dialect, 27 | test_sqlparse=False, 28 | ) 29 | 30 | 31 | @pytest.mark.parametrize("dialect", ["teradata"]) 32 | def test_teradata_title_phrase(dialect: str): 33 | """ 34 | The TITLE phrase of a CREATE TABLE, ALTER TABLE, or SELECT statement gives a name to a column heading. 35 | TITLE is a Teradata extension to the ANSI SQL:2011 standard. 36 | It is used for display formatting and should be ignored for lineage purposes. 37 | https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/SQL-Data-Types-and-Literals/Data-Type-Formats-and-Format-Phrases/TITLE/Examples/Example-Using-the-TITLE-Phrase-in-a-SELECT-Statement 38 | """ 39 | sql = """CREATE VIEW foo AS 40 | SELECT Name, DOB (TITLE 'Birthdate') 41 | FROM Employee;""" 42 | assert_column_lineage_equal( 43 | sql, 44 | [ 45 | ( 46 | ColumnQualifierTuple("Name", "Employee"), 47 | ColumnQualifierTuple("Name", "foo"), 48 | ), 49 | ( 50 | ColumnQualifierTuple("DOB", "Employee"), 51 | ColumnQualifierTuple("DOB", "foo"), 52 | ), 53 | ], 54 | dialect=dialect, 55 | test_sqlparse=False, 56 | ) 57 | -------------------------------------------------------------------------------- /tests/sql/column/test_column_select_union.py: -------------------------------------------------------------------------------- 1 | from sqllineage.utils.entities import ColumnQualifierTuple 2 | 3 | from ...helpers import assert_column_lineage_equal 4 | 5 | 6 | def test_column_reference_using_union(): 7 | sql = """INSERT INTO tab3 8 | SELECT col1 9 | FROM tab1 10 | UNION ALL 11 | SELECT col1 12 | FROM tab2""" 13 | assert_column_lineage_equal( 14 | sql, 15 | [ 16 | ( 17 | ColumnQualifierTuple("col1", "tab1"), 18 | ColumnQualifierTuple("col1", "tab3"), 19 | ), 20 | ( 21 | ColumnQualifierTuple("col1", "tab2"), 22 | ColumnQualifierTuple("col1", "tab3"), 23 | ), 24 | ], 25 | ) 26 | sql = """INSERT INTO tab3 27 | SELECT col1 28 | FROM tab1 29 | UNION 30 | SELECT col1 31 | FROM tab2""" 32 | assert_column_lineage_equal( 33 | sql, 34 | [ 35 | ( 36 | ColumnQualifierTuple("col1", "tab1"), 37 | ColumnQualifierTuple("col1", "tab3"), 38 | ), 39 | ( 40 | ColumnQualifierTuple("col1", "tab2"), 41 | ColumnQualifierTuple("col1", "tab3"), 42 | ), 43 | ], 44 | ) 45 | 46 | 47 | def test_union_inside_cte(): 48 | sql = """INSERT INTO dataset.target WITH temp_cte AS (SELECT col1 FROM dataset.tab1 UNION ALL 49 | SELECT col1 FROM dataset.tab2) SELECT col1 FROM temp_cte""" 50 | assert_column_lineage_equal( 51 | sql, 52 | [ 53 | ( 54 | ColumnQualifierTuple("col1", "dataset.tab1"), 55 | ColumnQualifierTuple("col1", "dataset.target"), 56 | ), 57 | ( 58 | ColumnQualifierTuple("col1", "dataset.tab2"), 59 | ColumnQualifierTuple("col1", "dataset.target"), 60 | ), 61 | ], 62 | ) 63 | 64 | 65 | def test_union_with_subquery(): 66 | sql = """INSERT INTO tab3 67 | SELECT sq1.id 68 | FROM (SELECT id 69 | FROM tab1) sq1 70 | UNION ALL 71 | SELECT sq2.id 72 | FROM (SELECT id 73 | FROM tab2) sq2""" 74 | assert_column_lineage_equal( 75 | sql, 76 | [ 77 | ( 78 | ColumnQualifierTuple("id", "tab1"), 79 | ColumnQualifierTuple("id", "tab3"), 80 | ), 81 | ( 82 | ColumnQualifierTuple("id", "tab2"), 83 | ColumnQualifierTuple("id", "tab3"), 84 | ), 85 | ], 86 | ) 87 | -------------------------------------------------------------------------------- /tests/sql/column/test_metadata_target_column.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from sqllineage.core.metadata_provider import MetaDataProvider 4 | from sqllineage.utils.entities import ColumnQualifierTuple 5 | 6 | from ...helpers import assert_column_lineage_equal, generate_metadata_providers 7 | 8 | providers = generate_metadata_providers( 9 | { 10 | "ods.source_tab": ["day_id", "user_id", "name"], 11 | "ods.target_tab": ["day_no", "user_code", "user_name"], 12 | } 13 | ) 14 | 15 | 16 | @pytest.mark.parametrize("provider", providers) 17 | def test_metadata_target_column(provider: MetaDataProvider): 18 | sql = """insert into ods.target_tab select day_id as acct_id, user_id as xxx, name as yyy from ods.source_tab""" 19 | assert_column_lineage_equal( 20 | sql=sql, 21 | column_lineages=[ 22 | ( 23 | ColumnQualifierTuple("name", "ods.source_tab"), 24 | ColumnQualifierTuple("user_name", "ods.target_tab"), 25 | ), 26 | ( 27 | ColumnQualifierTuple("day_id", "ods.source_tab"), 28 | ColumnQualifierTuple("day_no", "ods.target_tab"), 29 | ), 30 | ( 31 | ColumnQualifierTuple("user_id", "ods.source_tab"), 32 | ColumnQualifierTuple("user_code", "ods.target_tab"), 33 | ), 34 | ], 35 | metadata_provider=provider, 36 | test_sqlparse=False, 37 | ) 38 | 39 | 40 | @pytest.mark.parametrize("provider", providers) 41 | def test_metadata_target_column_cte(provider: MetaDataProvider): 42 | sql = """ 43 | INSERT INTO ods.target_tab 44 | WITH cte_table AS (SELECT day_id as acct_id, user_id as xxx, name as yyy FROM ods.source_tab) 45 | SELECT acct_id, xxx, yyy FROM cte_table""" 46 | assert_column_lineage_equal( 47 | sql=sql, 48 | column_lineages=[ 49 | ( 50 | ColumnQualifierTuple("user_id", "ods.source_tab"), 51 | ColumnQualifierTuple("user_code", "ods.target_tab"), 52 | ), 53 | ( 54 | ColumnQualifierTuple("day_id", "ods.source_tab"), 55 | ColumnQualifierTuple("day_no", "ods.target_tab"), 56 | ), 57 | ( 58 | ColumnQualifierTuple("name", "ods.source_tab"), 59 | ColumnQualifierTuple("user_name", "ods.target_tab"), 60 | ), 61 | ], 62 | metadata_provider=provider, 63 | test_sqlparse=False, 64 | ) 65 | -------------------------------------------------------------------------------- /tests/sql/table/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/tests/sql/table/__init__.py -------------------------------------------------------------------------------- /tests/sql/table/multiple_statements/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/tests/sql/table/multiple_statements/__init__.py -------------------------------------------------------------------------------- /tests/sql/table/multiple_statements/test_split.py: -------------------------------------------------------------------------------- 1 | from sqllineage.utils.helpers import split 2 | 3 | 4 | def test_split_statements(): 5 | sql = "SELECT * FROM tab1; SELECT * FROM tab2;" 6 | assert len(split(sql)) == 2 7 | 8 | 9 | def test_split_statements_with_heading_and_ending_new_line(): 10 | sql = "\nSELECT * FROM tab1;\nSELECT * FROM tab2;\n" 11 | assert len(split(sql)) == 2 12 | 13 | 14 | def test_split_statements_with_comment(): 15 | sql = """SELECT 1; 16 | 17 | -- SELECT 2;""" 18 | assert len(split(sql)) == 1 19 | 20 | 21 | def test_split_statements_with_show_create_table(): 22 | sql = """SELECT 1; 23 | 24 | SHOW CREATE TABLE tab1;""" 25 | assert len(split(sql)) == 2 26 | 27 | 28 | def test_split_statements_with_desc(): 29 | sql = """SELECT 1; 30 | 31 | DESC tab1;""" 32 | assert len(split(sql)) == 2 33 | 34 | 35 | def test_split_statement_ends_with_multiple_semicolons(): 36 | sql = "SELECT 1;;;" 37 | assert len(split(sql)) == 1 38 | -------------------------------------------------------------------------------- /tests/sql/table/multiple_statements/test_tmp_table.py: -------------------------------------------------------------------------------- 1 | from ....helpers import assert_table_lineage_equal 2 | 3 | 4 | def test_drop_tmp_tab_after_create(): 5 | sql = """CREATE TABLE tab_a AS 6 | SELECT * 7 | FROM tab_b; 8 | INSERT INTO tab_c 9 | SELECT * 10 | FROM tab_a; 11 | DROP TABLE tab_a;""" 12 | assert_table_lineage_equal(sql, {"tab_b"}, {"tab_c"}) 13 | 14 | 15 | def test_new_create_tab_as_tmp_table(): 16 | sql = """CREATE TABLE tab_a AS 17 | SELECT * 18 | FROM tab_b; 19 | CREATE TABLE tab_c AS 20 | SELECT * 21 | FROM tab_a;""" 22 | assert_table_lineage_equal(sql, {"tab_b"}, {"tab_c"}) 23 | 24 | 25 | def test_create_after_drop(): 26 | assert_table_lineage_equal( 27 | "DROP TABLE IF EXISTS tab1; CREATE TABLE IF NOT EXISTS tab1 AS SELECT 1", 28 | None, 29 | {"tab1"}, 30 | ) 31 | 32 | 33 | def test_drop_after_create(): 34 | assert_table_lineage_equal( 35 | "CREATE TABLE IF NOT EXISTS tab1 AS SELECT 1; DROP TABLE IF EXISTS tab1", 36 | None, 37 | None, 38 | ) 39 | 40 | 41 | def test_alter_target_table_name(): 42 | assert_table_lineage_equal( 43 | "INSERT INTO tab1 SELECT * FROM tab2; ALTER TABLE tab1 RENAME TO tab3;", 44 | {"tab2"}, 45 | {"tab3"}, 46 | ) 47 | -------------------------------------------------------------------------------- /tests/sql/table/multiple_statements/test_tsql_no_semicolon.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | from ....helpers import assert_table_lineage_equal 7 | 8 | 9 | @patch.dict(os.environ, {"SQLLINEAGE_TSQL_NO_SEMICOLON": "TRUE"}) 10 | @pytest.mark.parametrize("dialect", ["tsql"]) 11 | def test_tsql_multi_statement_no_semicolon(dialect: str): 12 | """ 13 | tsql multiple statements without explicit semicolon as splitter. 14 | """ 15 | sql = """insert into tab1 select * from foo 16 | insert into tab2 select * from bar""" 17 | assert_table_lineage_equal( 18 | sql, 19 | {"foo", "bar"}, 20 | {"tab1", "tab2"}, 21 | dialect=dialect, 22 | test_sqlparse=False, 23 | ) 24 | -------------------------------------------------------------------------------- /tests/sql/table/multiple_statements/test_variable.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ....helpers import assert_table_lineage_equal 4 | 5 | 6 | @pytest.mark.parametrize("dialect", ["tsql"]) 7 | def test_tsql_declare(dialect: str): 8 | """ 9 | https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 10 | """ 11 | sql = """DECLARE @age int = 10; 12 | INSERT INTO tgt 13 | SELECT Name,@age 14 | FROM People;""" 15 | assert_table_lineage_equal(sql, {"People"}, {"tgt"}, dialect=dialect) 16 | -------------------------------------------------------------------------------- /tests/sql/table/test_create.py: -------------------------------------------------------------------------------- 1 | from ...helpers import assert_table_lineage_equal 2 | 3 | 4 | def test_create(): 5 | assert_table_lineage_equal( 6 | "CREATE TABLE tab1 (col1 STRING)", 7 | None, 8 | {"tab1"}, 9 | ) 10 | 11 | 12 | def test_create_if_not_exist(): 13 | assert_table_lineage_equal( 14 | "CREATE TABLE IF NOT EXISTS tab1 (col1 STRING)", 15 | None, 16 | {"tab1"}, 17 | ) 18 | 19 | 20 | def test_create_as(): 21 | assert_table_lineage_equal( 22 | "CREATE TABLE tab1 AS SELECT * FROM tab2", {"tab2"}, {"tab1"} 23 | ) 24 | 25 | 26 | def test_create_like(): 27 | assert_table_lineage_equal("CREATE TABLE tab1 LIKE tab2", {"tab2"}, {"tab1"}) 28 | 29 | 30 | def test_create_view(): 31 | assert_table_lineage_equal( 32 | """CREATE VIEW view1 33 | as 34 | SELECT 35 | col1, 36 | col2 37 | FROM tab1 38 | GROUP BY 39 | col1""", 40 | {"tab1"}, 41 | {"view1"}, 42 | ) 43 | 44 | 45 | def test_create_as_with_parenthesis_around_select_statement(): 46 | sql = "CREATE TABLE tab1 AS (SELECT * FROM tab2)" 47 | assert_table_lineage_equal(sql, {"tab2"}, {"tab1"}, test_sqlparse=False) 48 | 49 | 50 | def test_create_as_with_parenthesis_around_both(): 51 | sql = "CREATE TABLE tab1 AS (SELECT * FROM (tab2))" 52 | assert_table_lineage_equal(sql, {"tab2"}, {"tab1"}, test_sqlparse=False) 53 | -------------------------------------------------------------------------------- /tests/sql/table/test_cte_dialect_specific.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ...helpers import assert_table_lineage_equal 4 | 5 | """ 6 | This test class will contain all the tests for testing 'CTE Queries' where the dialect is not ANSI. 7 | """ 8 | 9 | 10 | @pytest.mark.parametrize("dialect", ["databricks", "hive", "sparksql"]) 11 | def test_with_insert_plus_table_keyword(dialect: str): 12 | assert_table_lineage_equal( 13 | "WITH tab1 AS (SELECT * FROM tab2) INSERT INTO TABLE tab3 SELECT * FROM tab1", 14 | {"tab2"}, 15 | {"tab3"}, 16 | dialect=dialect, 17 | ) 18 | 19 | 20 | @pytest.mark.parametrize("dialect", ["databricks", "hive", "sparksql"]) 21 | def test_with_insert_overwrite(dialect: str): 22 | assert_table_lineage_equal( 23 | "WITH tab1 AS (SELECT * FROM tab2) INSERT OVERWRITE TABLE tab3 SELECT * FROM tab1", 24 | {"tab2"}, 25 | {"tab3"}, 26 | dialect=dialect, 27 | ) 28 | 29 | 30 | @pytest.mark.parametrize("dialect", ["databricks", "sparksql"]) 31 | def test_with_insert_overwrite_without_table_keyword(dialect: str): 32 | assert_table_lineage_equal( 33 | "WITH tab1 AS (SELECT * FROM tab2) INSERT OVERWRITE tab3 SELECT * FROM tab1", 34 | {"tab2"}, 35 | {"tab3"}, 36 | dialect=dialect, 37 | ) 38 | 39 | 40 | @pytest.mark.parametrize("dialect", ["databricks", "sparksql"]) 41 | def test_with_select_one_without_as(dialect: str): 42 | # AS in CTE is negligible in SparkSQL, however it is required in most other dialects 43 | # https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-cte.html 44 | # https://dev.mysql.com/doc/refman/8.0/en/with.html 45 | assert_table_lineage_equal( 46 | "WITH wtab1 (SELECT * FROM schema1.tab1) SELECT * FROM wtab1", 47 | {"schema1.tab1"}, 48 | dialect=dialect, 49 | ) 50 | -------------------------------------------------------------------------------- /tests/sql/table/test_insert.py: -------------------------------------------------------------------------------- 1 | from ...helpers import assert_table_lineage_equal 2 | 3 | 4 | def test_insert_into_values(): 5 | assert_table_lineage_equal("INSERT INTO tab1 VALUES (1, 2)", set(), {"tab1"}) 6 | 7 | 8 | def test_insert_into_values_with_subquery(): 9 | assert_table_lineage_equal( 10 | "INSERT INTO tab1 VALUES (1, (SELECT max(id) FROM tab2))", 11 | {"tab2"}, 12 | {"tab1"}, 13 | ) 14 | 15 | 16 | def test_insert_into_values_with_multiple_subquery(): 17 | assert_table_lineage_equal( 18 | "INSERT INTO tab1 VALUES ((SELECT max(id) FROM tab2), (SELECT max(id) FROM tab3))", 19 | {"tab2", "tab3"}, 20 | {"tab1"}, 21 | ) 22 | 23 | 24 | def test_insert_into_values_with_multiple_subquery_in_multiple_row(): 25 | assert_table_lineage_equal( 26 | "INSERT INTO tab1 VALUES (1, (SELECT max(id) FROM tab2)), (2, (SELECT max(id) FROM tab3))", 27 | {"tab2", "tab3"}, 28 | {"tab1"}, 29 | ) 30 | 31 | 32 | def test_insert_into_select(): 33 | assert_table_lineage_equal( 34 | "INSERT INTO tab1 SELECT * FROM tab2;", 35 | {"tab2"}, 36 | {"tab1"}, 37 | ) 38 | 39 | 40 | def test_non_reserved_keyword_as_target(): 41 | assert_table_lineage_equal( 42 | "INSERT INTO host SELECT col1, col2 FROM segment", 43 | {"segment"}, 44 | {"host"}, 45 | test_sqlparse=False, 46 | ) 47 | 48 | 49 | def test_insert_into_qualified_table_with_parenthesized_query(): 50 | """ 51 | For sqlparse, it will work if: 52 | 1) table in unqualified 53 | OR 2) query is not surrounded by parenthesis 54 | With both in the game, it breaks. 55 | """ 56 | sql = """INSERT INTO default.tab2 57 | (SELECT * 58 | FROM tab1)""" 59 | assert_table_lineage_equal(sql, {"tab1"}, {"default.tab2"}, test_sqlparse=False) 60 | -------------------------------------------------------------------------------- /tests/sql/table/test_merge.py: -------------------------------------------------------------------------------- 1 | from ...helpers import assert_table_lineage_equal 2 | 3 | 4 | def test_merge_into_using_table(): 5 | sql = """MERGE INTO target 6 | USING src ON target.k = src.k 7 | WHEN MATCHED THEN UPDATE SET target.v = src.v""" 8 | assert_table_lineage_equal(sql, {"src"}, {"target"}) 9 | 10 | 11 | def test_merge_into_using_subquery(): 12 | sql = """MERGE INTO target USING (select k, max(v) as v from src group by k) AS b ON target.k = b.k 13 | WHEN MATCHED THEN UPDATE SET target.v = b.v 14 | WHEN NOT MATCHED THEN INSERT (k, v) VALUES (b.k, b.v)""" 15 | assert_table_lineage_equal(sql, {"src"}, {"target"}) 16 | 17 | 18 | def test_merge_using_cte_subquery(): 19 | sql = """MERGE INTO tgt t 20 | USING ( 21 | WITH base AS ( 22 | SELECT 23 | id, max(value) AS value 24 | FROM src 25 | GROUP BY id 26 | ) 27 | SELECT 28 | id, value 29 | FROM base 30 | ) s 31 | ON t.id = s.id 32 | WHEN MATCHED THEN 33 | UPDATE SET t.value = s.value""" 34 | assert_table_lineage_equal( 35 | sql, 36 | {"src"}, 37 | {"tgt"}, 38 | ) 39 | 40 | 41 | def test_merge_into_insert_one_column(): 42 | sql = """MERGE INTO target 43 | USING src ON target.k = src.k 44 | WHEN NOT MATCHED THEN INSERT VALUES (src.k)""" 45 | assert_table_lineage_equal(sql, {"src"}, {"target"}) 46 | 47 | 48 | def test_merge_with_union_in_subquery_and_join(): 49 | sql = """MERGE INTO tgt t 50 | USING (SELECT s1.id, baz.value 51 | FROM (SELECT id 52 | FROM foo 53 | UNION ALL 54 | SELECT id 55 | FROM bar) s1 56 | CROSS JOIN baz) s 57 | ON t.id = s.id 58 | WHEN MATCHED THEN UPDATE SET t.value = s.value 59 | """ 60 | assert_table_lineage_equal(sql, {"foo", "bar", "baz"}, {"tgt"}) 61 | -------------------------------------------------------------------------------- /tests/sql/table/test_merge_dialect_specific.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ...helpers import assert_table_lineage_equal 4 | 5 | 6 | @pytest.mark.parametrize("dialect", ["bigquery"]) 7 | def test_merge_without_into(dialect: str): 8 | """ 9 | INTO is optional in BigQuery MERGE statement: 10 | https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement 11 | """ 12 | sql = """MERGE target USING src AS b ON target.k = b.k 13 | WHEN MATCHED THEN UPDATE SET target.v = b.v 14 | WHEN NOT MATCHED THEN INSERT (k, v) VALUES (b.k, b.v)""" 15 | assert_table_lineage_equal(sql, {"src"}, {"target"}, dialect=dialect) 16 | 17 | 18 | @pytest.mark.parametrize("dialect", ["bigquery"]) 19 | def test_merge_insert_row(dialect: str): 20 | """ 21 | MERGE INSERT CLAUSE in BigQuery can be INSERT ROW without specifying columns via INSERT VALUES (col, ...) 22 | https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement 23 | """ 24 | sql = """MERGE INTO tgt t 25 | USING src s 26 | ON t.date = s.date and t.channel = s.channel 27 | WHEN NOT MATCHED THEN 28 | INSERT ROW 29 | WHEN MATCHED THEN 30 | UPDATE SET t.col = s.col""" 31 | assert_table_lineage_equal( 32 | sql, 33 | {"src"}, 34 | {"tgt"}, 35 | dialect=dialect, 36 | ) 37 | -------------------------------------------------------------------------------- /tests/sql/table/test_other_with_lineage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reata/sqllineage/89e5a614916f2252d61dfacb73d88657f30ab77a/tests/sql/table/test_other_with_lineage.py -------------------------------------------------------------------------------- /tests/sql/table/test_other_with_lineage_dialect_specific.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ...helpers import assert_table_lineage_equal 4 | 5 | 6 | @pytest.mark.parametrize("dialect", ["hive"]) 7 | def test_alter_table_exchange_partition(dialect: str): 8 | """ 9 | See https://cwiki.apache.org/confluence/display/Hive/Exchange+Partition for language manual 10 | """ 11 | assert_table_lineage_equal( 12 | "ALTER TABLE tab1 EXCHANGE PARTITION (pt='part1') WITH TABLE tab2", 13 | {"tab2"}, 14 | {"tab1"}, 15 | dialect=dialect, 16 | ) 17 | 18 | 19 | @pytest.mark.parametrize("dialect", ["snowflake"]) 20 | def test_alter_table_swap_partition(dialect: str): 21 | """ 22 | See https://docs.snowflake.com/en/sql-reference/sql/alter-table for language manual 23 | Note swap is not a keyword in sqlparse, we'll skip testing for it. 24 | """ 25 | assert_table_lineage_equal( 26 | "ALTER TABLE tab1 SWAP WITH tab2", 27 | {"tab2"}, 28 | {"tab1"}, 29 | dialect=dialect, 30 | test_sqlparse=False, 31 | ) 32 | 33 | 34 | @pytest.mark.parametrize("dialect", ["vertica"]) 35 | def test_swapping_partitions(dialect: str): 36 | """ 37 | See https://www.vertica.com/docs/10.0.x/HTML/Content/Authoring/AdministratorsGuide/Partitions/SwappingPartitions.htm 38 | for language specification 39 | """ 40 | assert_table_lineage_equal( 41 | "SELECT swap_partitions_between_tables('staging', 'min-range-value', 'max-range-value', 'target')", 42 | {"staging"}, 43 | {"target"}, 44 | dialect=dialect, 45 | ) 46 | -------------------------------------------------------------------------------- /tests/sql/table/test_other_without_lineage.py: -------------------------------------------------------------------------------- 1 | from ...helpers import assert_table_lineage_equal 2 | 3 | 4 | def test_use(): 5 | assert_table_lineage_equal("USE db1") 6 | 7 | 8 | def test_drop(): 9 | assert_table_lineage_equal("DROP TABLE IF EXISTS tab1") 10 | 11 | 12 | def test_drop_with_comment(): 13 | assert_table_lineage_equal( 14 | """--comment 15 | DROP TABLE IF EXISTS tab1""" 16 | ) 17 | 18 | 19 | def test_drop_view(): 20 | assert_table_lineage_equal("DROP VIEW IF EXISTS view1") 21 | 22 | 23 | def test_alter_table_rename(): 24 | assert_table_lineage_equal("ALTER TABLE tab1 rename TO tab2") 25 | 26 | 27 | def test_truncate_table(): 28 | assert_table_lineage_equal("TRUNCATE TABLE tab1") 29 | 30 | 31 | def test_delete_from_table(): 32 | assert_table_lineage_equal("DELETE FROM table tab1") 33 | -------------------------------------------------------------------------------- /tests/sql/table/test_update.py: -------------------------------------------------------------------------------- 1 | from ...helpers import assert_table_lineage_equal 2 | 3 | 4 | def test_update(): 5 | assert_table_lineage_equal( 6 | "UPDATE tab1 SET col1='val1' WHERE col2='val2'", None, {"tab1"} 7 | ) 8 | 9 | 10 | def test_update_from(): 11 | assert_table_lineage_equal( 12 | """UPDATE tab2 13 | SET tab2.col2 = tab1.col2 FROM tab1 14 | WHERE tab2.col1 = tab1.col1""", 15 | {"tab1"}, 16 | {"tab2"}, 17 | ) 18 | -------------------------------------------------------------------------------- /tests/sql/table/test_update_dialect_specific.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ...helpers import assert_table_lineage_equal 4 | 5 | 6 | @pytest.mark.parametrize("dialect", ["mysql"]) 7 | def test_update_with_join(dialect: str): 8 | assert_table_lineage_equal( 9 | "UPDATE tab1 a INNER JOIN tab2 b ON a.col1=b.col1 SET a.col2=b.col2", 10 | {"tab2"}, 11 | {"tab1"}, 12 | dialect=dialect, 13 | ) 14 | --------------------------------------------------------------------------------