├── .github ├── actions │ └── setup │ │ └── action.yml └── workflows │ ├── build_and_test.yml │ ├── codeql_analysis.yml │ └── neo4j_tests.yml ├── .gitignore ├── LICENSE ├── README.md ├── assembly ├── spark-sql-flow-plugin_2.12_spark3.2_1.0.0-SNAPSHOT-with-dependencies.jar └── spark-sql-flow-plugin_2.12_spark3.2_1.0.0-SNAPSHOT.jar ├── bin ├── .startup.py ├── activate-conda-env.sh ├── conda.py ├── conda.sh ├── conda.yml ├── install.sh ├── launch-jupyter-lab.sh ├── lint-python ├── mypy.ini ├── package.sh ├── pycodestyle-2.6.0.py ├── python ├── requirements.txt ├── run-tests ├── run-tests.py ├── spark-shell └── tox.ini ├── build └── mvn ├── md ├── pom.xml ├── python ├── sqlflow.py └── tests │ ├── test_neo4jaura_sink.py │ ├── test_sqlflow.py │ └── testutils.py ├── resources ├── README.md ├── graphviz_1.svg ├── graphviz_2.svg ├── graphviz_3.svg ├── neo4j_aura_1.svg ├── networkx_example.ipynb ├── spark-data-repair-plugin-graphviz.svg ├── spark-data-repair-plugin-neo4jaura.svg └── tpcds-q10-neo4jaura.svg ├── scalastyle-config.xml └── src ├── main └── scala │ └── org │ └── apache │ └── spark │ ├── api │ └── python │ │ └── SQLFlowApi.scala │ └── sql │ └── flow │ ├── SQLFlow.scala │ ├── interfaces.scala │ ├── listeners.scala │ └── sink │ ├── MermaidSink.scala │ ├── Neo4jAuraSink.scala │ ├── SinkUtils.scala │ └── basicSinks.scala └── test ├── resources ├── log4j.properties ├── sql-flow-tests │ ├── inputs │ │ ├── basics.sql │ │ ├── exists-subquery-aggregate.sql │ │ ├── exists-subquery.sql │ │ ├── group-analytics.sql │ │ ├── group-by-filter.sql │ │ ├── group-by.sql │ │ ├── in-subquery-aggregate.sql │ │ ├── in-subquery.sql │ │ ├── joins.sql │ │ ├── scalar-subquery-predicate.sql │ │ ├── scalar-subquery.sql │ │ ├── set-operations.sql │ │ ├── subquery-in-from.sql │ │ └── window.sql │ └── results │ │ ├── basics-contracted.sql.dot │ │ ├── basics-contracted.sql.svg │ │ ├── basics.sql.dot │ │ ├── basics.sql.svg │ │ ├── exists-subquery-aggregate-contracted.sql.dot │ │ ├── exists-subquery-aggregate-contracted.sql.svg │ │ ├── exists-subquery-aggregate.sql.dot │ │ ├── exists-subquery-aggregate.sql.svg │ │ ├── exists-subquery-contracted.sql.dot │ │ ├── exists-subquery-contracted.sql.svg │ │ ├── exists-subquery.sql.dot │ │ ├── exists-subquery.sql.svg │ │ ├── group-analytics-contracted.sql.dot │ │ ├── group-analytics-contracted.sql.svg │ │ ├── group-analytics.sql.dot │ │ ├── group-analytics.sql.svg │ │ ├── group-by-contracted.sql.dot │ │ ├── group-by-contracted.sql.svg │ │ ├── group-by-filter-contracted.sql.dot │ │ ├── group-by-filter.sql.dot │ │ ├── group-by.sql.dot │ │ ├── group-by.sql.svg │ │ ├── in-subquery-aggregate-contracted.sql.dot │ │ ├── in-subquery-aggregate-contracted.sql.svg │ │ ├── in-subquery-aggregate.sql.dot │ │ ├── in-subquery-aggregate.sql.svg │ │ ├── in-subquery-contracted.sql.dot │ │ ├── in-subquery-contracted.sql.svg │ │ ├── in-subquery.sql.dot │ │ ├── in-subquery.sql.svg │ │ ├── joins-contracted.sql.dot │ │ ├── joins-contracted.sql.svg │ │ ├── joins.sql.dot │ │ ├── joins.sql.svg │ │ ├── scalar-subquery-contracted.sql.dot │ │ ├── scalar-subquery-contracted.sql.svg │ │ ├── scalar-subquery-predicate-contracted.sql.dot │ │ ├── scalar-subquery-predicate-contracted.sql.svg │ │ ├── scalar-subquery-predicate.sql.dot │ │ ├── scalar-subquery-predicate.sql.svg │ │ ├── scalar-subquery.sql.dot │ │ ├── scalar-subquery.sql.svg │ │ ├── set-operations-contracted.sql.dot │ │ ├── set-operations-contracted.sql.svg │ │ ├── set-operations.sql.dot │ │ ├── set-operations.sql.svg │ │ ├── subquery-in-from-contracted.sql.dot │ │ ├── subquery-in-from-contracted.sql.svg │ │ ├── subquery-in-from.sql.dot │ │ ├── subquery-in-from.sql.svg │ │ ├── window-contracted.sql.dot │ │ ├── window-contracted.sql.svg │ │ ├── window.sql.dot │ │ └── window.sql.svg └── tpcds-flow-tests │ ├── inputs │ ├── q1.sql │ ├── q10.sql │ ├── q11.sql │ ├── q12.sql │ ├── q13.sql │ ├── q14a.sql │ ├── q14b.sql │ ├── q15.sql │ ├── q16.sql │ ├── q17.sql │ ├── q18.sql │ ├── q19.sql │ ├── q2.sql │ ├── q20.sql │ ├── q21.sql │ ├── q22.sql │ ├── q23a.sql │ ├── q23b.sql │ ├── q24a.sql │ ├── q24b.sql │ ├── q25.sql │ ├── q26.sql │ ├── q27.sql │ ├── q28.sql │ ├── q29.sql │ ├── q3.sql │ ├── q30.sql │ ├── q31.sql │ ├── q32.sql │ ├── q33.sql │ ├── q34.sql │ ├── q35.sql │ ├── q36.sql │ ├── q37.sql │ ├── q38.sql │ ├── q39a.sql │ ├── q39b.sql │ ├── q4.sql │ ├── q40.sql │ ├── q41.sql │ ├── q42.sql │ ├── q43.sql │ ├── q44.sql │ ├── q45.sql │ ├── q46.sql │ ├── q47.sql │ ├── q48.sql │ ├── q49.sql │ ├── q5.sql │ ├── q50.sql │ ├── q51.sql │ ├── q52.sql │ ├── q53.sql │ ├── q54.sql │ ├── q55.sql │ ├── q56.sql │ ├── q57.sql │ ├── q58.sql │ ├── q59.sql │ ├── q6.sql │ ├── q60.sql │ ├── q61.sql │ ├── q62.sql │ ├── q63.sql │ ├── q64.sql │ ├── q65.sql │ ├── q66.sql │ ├── q67.sql │ ├── q68.sql │ ├── q69.sql │ ├── q7.sql │ ├── q70.sql │ ├── q71.sql │ ├── q72.sql │ ├── q73.sql │ ├── q74.sql │ ├── q75.sql │ ├── q76.sql │ ├── q77.sql │ ├── q78.sql │ ├── q79.sql │ ├── q8.sql │ ├── q80.sql │ ├── q81.sql │ ├── q82.sql │ ├── q83.sql │ ├── q84.sql │ ├── q85.sql │ ├── q86.sql │ ├── q87.sql │ ├── q88.sql │ ├── q89.sql │ ├── q9.sql │ ├── q90.sql │ ├── q91.sql │ ├── q92.sql │ ├── q93.sql │ ├── q94.sql │ ├── q95.sql │ ├── q96.sql │ ├── q97.sql │ ├── q98.sql │ └── q99.sql │ └── results │ ├── q1-contracted.sql.dot │ ├── q1-contracted.sql.svg │ ├── q1.sql.dot │ ├── q1.sql.svg │ ├── q10-contracted.sql.dot │ ├── q10-contracted.sql.svg │ ├── q10.sql.dot │ ├── q10.sql.svg │ ├── q11-contracted.sql.dot │ ├── q11-contracted.sql.svg │ ├── q11.sql.dot │ ├── q11.sql.svg │ ├── q12-contracted.sql.dot │ ├── q12-contracted.sql.svg │ ├── q12.sql.dot │ ├── q12.sql.svg │ ├── q13-contracted.sql.dot │ ├── q13-contracted.sql.svg │ ├── q13.sql.dot │ ├── q13.sql.svg │ ├── q14a-contracted.sql.dot │ ├── q14a-contracted.sql.svg │ ├── q14a.sql.dot │ ├── q14a.sql.svg │ ├── q14b-contracted.sql.dot │ ├── q14b-contracted.sql.svg │ ├── q14b.sql.dot │ ├── q14b.sql.svg │ ├── q15-contracted.sql.dot │ ├── q15-contracted.sql.svg │ ├── q15.sql.dot │ ├── q15.sql.svg │ ├── q16-contracted.sql.dot │ ├── q16-contracted.sql.svg │ ├── q16.sql.dot │ ├── q16.sql.svg │ ├── q17-contracted.sql.dot │ ├── q17-contracted.sql.svg │ ├── q17.sql.dot │ ├── q17.sql.svg │ ├── q18-contracted.sql.dot │ ├── q18-contracted.sql.svg │ ├── q18.sql.dot │ ├── q18.sql.svg │ ├── q19-contracted.sql.dot │ ├── q19-contracted.sql.svg │ ├── q19.sql.dot │ ├── q19.sql.svg │ ├── q2-contracted.sql.dot │ ├── q2-contracted.sql.svg │ ├── q2.sql.dot │ ├── q2.sql.svg │ ├── q20-contracted.sql.dot │ ├── q20-contracted.sql.svg │ ├── q20.sql.dot │ ├── q20.sql.svg │ ├── q21-contracted.sql.dot │ ├── q21-contracted.sql.svg │ ├── q21.sql.dot │ ├── q21.sql.svg │ ├── q22-contracted.sql.dot │ ├── q22-contracted.sql.svg │ ├── q22.sql.dot │ ├── q22.sql.svg │ ├── q23a-contracted.sql.dot │ ├── q23a-contracted.sql.svg │ ├── q23a.sql.dot │ ├── q23a.sql.svg │ ├── q23b-contracted.sql.dot │ ├── q23b-contracted.sql.svg │ ├── q23b.sql.dot │ ├── q23b.sql.svg │ ├── q24a-contracted.sql.dot │ ├── q24a-contracted.sql.svg │ ├── q24a.sql.dot │ ├── q24a.sql.svg │ ├── q24b-contracted.sql.dot │ ├── q24b-contracted.sql.svg │ ├── q24b.sql.dot │ ├── q24b.sql.svg │ ├── q25-contracted.sql.dot │ ├── q25-contracted.sql.svg │ ├── q25.sql.dot │ ├── q25.sql.svg │ ├── q26-contracted.sql.dot │ ├── q26-contracted.sql.svg │ ├── q26.sql.dot │ ├── q26.sql.svg │ ├── q27-contracted.sql.dot │ ├── q27-contracted.sql.svg │ ├── q27.sql.dot │ ├── q27.sql.svg │ ├── q29-contracted.sql.dot │ ├── q29-contracted.sql.svg │ ├── q29.sql.dot │ ├── q29.sql.svg │ ├── q3-contracted.sql.dot │ ├── q3-contracted.sql.svg │ ├── q3.sql.dot │ ├── q3.sql.svg │ ├── q30-contracted.sql.dot │ ├── q30-contracted.sql.svg │ ├── q30.sql.dot │ ├── q30.sql.svg │ ├── q31-contracted.sql.dot │ ├── q31-contracted.sql.svg │ ├── q31.sql.dot │ ├── q31.sql.svg │ ├── q32-contracted.sql.dot │ ├── q32-contracted.sql.svg │ ├── q32.sql.dot │ ├── q32.sql.svg │ ├── q33-contracted.sql.dot │ ├── q33-contracted.sql.svg │ ├── q33.sql.dot │ ├── q33.sql.svg │ ├── q34-contracted.sql.dot │ ├── q34-contracted.sql.svg │ ├── q34.sql.dot │ ├── q34.sql.svg │ ├── q35-contracted.sql.dot │ ├── q35-contracted.sql.svg │ ├── q35.sql.dot │ ├── q35.sql.svg │ ├── q36-contracted.sql.dot │ ├── q36-contracted.sql.svg │ ├── q36.sql.dot │ ├── q36.sql.svg │ ├── q37-contracted.sql.dot │ ├── q37-contracted.sql.svg │ ├── q37.sql.dot │ ├── q37.sql.svg │ ├── q38-contracted.sql.dot │ ├── q38-contracted.sql.svg │ ├── q38.sql.dot │ ├── q38.sql.svg │ ├── q39a-contracted.sql.dot │ ├── q39a-contracted.sql.svg │ ├── q39a.sql.dot │ ├── q39a.sql.svg │ ├── q39b-contracted.sql.dot │ ├── q39b-contracted.sql.svg │ ├── q39b.sql.dot │ ├── q39b.sql.svg │ ├── q4-contracted.sql.dot │ ├── q4-contracted.sql.svg │ ├── q4.sql.dot │ ├── q4.sql.svg │ ├── q40-contracted.sql.dot │ ├── q40-contracted.sql.svg │ ├── q40.sql.dot │ ├── q40.sql.svg │ ├── q41-contracted.sql.dot │ ├── q41-contracted.sql.svg │ ├── q41.sql.dot │ ├── q41.sql.svg │ ├── q42-contracted.sql.dot │ ├── q42-contracted.sql.svg │ ├── q42.sql.dot │ ├── q42.sql.svg │ ├── q43-contracted.sql.dot │ ├── q43-contracted.sql.svg │ ├── q43.sql.dot │ ├── q43.sql.svg │ ├── q44-contracted.sql.dot │ ├── q44-contracted.sql.svg │ ├── q44.sql.dot │ ├── q44.sql.svg │ ├── q45-contracted.sql.dot │ ├── q45-contracted.sql.svg │ ├── q45.sql.dot │ ├── q45.sql.svg │ ├── q46-contracted.sql.dot │ ├── q46-contracted.sql.svg │ ├── q46.sql.dot │ ├── q46.sql.svg │ ├── q47-contracted.sql.dot │ ├── q47-contracted.sql.svg │ ├── q47.sql.dot │ ├── q47.sql.svg │ ├── q48-contracted.sql.dot │ ├── q48-contracted.sql.svg │ ├── q48.sql.dot │ ├── q48.sql.svg │ ├── q49-contracted.sql.dot │ ├── q49-contracted.sql.svg │ ├── q49.sql.dot │ ├── q49.sql.svg │ ├── q5-contracted.sql.dot │ ├── q5-contracted.sql.svg │ ├── q5.sql.dot │ ├── q5.sql.svg │ ├── q50-contracted.sql.dot │ ├── q50-contracted.sql.svg │ ├── q50.sql.dot │ ├── q50.sql.svg │ ├── q51-contracted.sql.dot │ ├── q51-contracted.sql.svg │ ├── q51.sql.dot │ ├── q51.sql.svg │ ├── q52-contracted.sql.dot │ ├── q52-contracted.sql.svg │ ├── q52.sql.dot │ ├── q52.sql.svg │ ├── q53-contracted.sql.dot │ ├── q53-contracted.sql.svg │ ├── q53.sql.dot │ ├── q53.sql.svg │ ├── q54-contracted.sql.dot │ ├── q54-contracted.sql.svg │ ├── q54.sql.dot │ ├── q54.sql.svg │ ├── q55-contracted.sql.dot │ ├── q55-contracted.sql.svg │ ├── q55.sql.dot │ ├── q55.sql.svg │ ├── q56-contracted.sql.dot │ ├── q56-contracted.sql.svg │ ├── q56.sql.dot │ ├── q56.sql.svg │ ├── q57-contracted.sql.dot │ ├── q57-contracted.sql.svg │ ├── q57.sql.dot │ ├── q57.sql.svg │ ├── q58-contracted.sql.dot │ ├── q58-contracted.sql.svg │ ├── q58.sql.dot │ ├── q58.sql.svg │ ├── q59-contracted.sql.dot │ ├── q59-contracted.sql.svg │ ├── q59.sql.dot │ ├── q59.sql.svg │ ├── q6-contracted.sql.dot │ ├── q6-contracted.sql.svg │ ├── q6.sql.dot │ ├── q6.sql.svg │ ├── q60-contracted.sql.dot │ ├── q60-contracted.sql.svg │ ├── q60.sql.dot │ ├── q60.sql.svg │ ├── q61-contracted.sql.dot │ ├── q61-contracted.sql.svg │ ├── q61.sql.dot │ ├── q61.sql.svg │ ├── q62-contracted.sql.dot │ ├── q62-contracted.sql.svg │ ├── q62.sql.dot │ ├── q62.sql.svg │ ├── q63-contracted.sql.dot │ ├── q63-contracted.sql.svg │ ├── q63.sql.dot │ ├── q63.sql.svg │ ├── q64-contracted.sql.dot │ ├── q64-contracted.sql.svg │ ├── q64.sql.dot │ ├── q64.sql.svg │ ├── q65-contracted.sql.dot │ ├── q65-contracted.sql.svg │ ├── q65.sql.dot │ ├── q65.sql.svg │ ├── q66-contracted.sql.dot │ ├── q66-contracted.sql.svg │ ├── q66.sql.dot │ ├── q66.sql.svg │ ├── q67-contracted.sql.dot │ ├── q67-contracted.sql.svg │ ├── q67.sql.dot │ ├── q67.sql.svg │ ├── q68-contracted.sql.dot │ ├── q68-contracted.sql.svg │ ├── q68.sql.dot │ ├── q68.sql.svg │ ├── q69-contracted.sql.dot │ ├── q69-contracted.sql.svg │ ├── q69.sql.dot │ ├── q69.sql.svg │ ├── q7-contracted.sql.dot │ ├── q7-contracted.sql.svg │ ├── q7.sql.dot │ ├── q7.sql.svg │ ├── q70-contracted.sql.dot │ ├── q70-contracted.sql.svg │ ├── q70.sql.dot │ ├── q70.sql.svg │ ├── q71-contracted.sql.dot │ ├── q71-contracted.sql.svg │ ├── q71.sql.dot │ ├── q71.sql.svg │ ├── q72-contracted.sql.dot │ ├── q72-contracted.sql.svg │ ├── q72.sql.dot │ ├── q72.sql.svg │ ├── q73-contracted.sql.dot │ ├── q73-contracted.sql.svg │ ├── q73.sql.dot │ ├── q73.sql.svg │ ├── q74-contracted.sql.dot │ ├── q74-contracted.sql.svg │ ├── q74.sql.dot │ ├── q74.sql.svg │ ├── q75-contracted.sql.dot │ ├── q75-contracted.sql.svg │ ├── q75.sql.dot │ ├── q75.sql.svg │ ├── q76-contracted.sql.dot │ ├── q76-contracted.sql.svg │ ├── q76.sql.dot │ ├── q76.sql.svg │ ├── q77-contracted.sql.dot │ ├── q77-contracted.sql.svg │ ├── q77.sql.dot │ ├── q77.sql.svg │ ├── q78-contracted.sql.dot │ ├── q78-contracted.sql.svg │ ├── q78.sql.dot │ ├── q78.sql.svg │ ├── q79-contracted.sql.dot │ ├── q79-contracted.sql.svg │ ├── q79.sql.dot │ ├── q79.sql.svg │ ├── q8-contracted.sql.dot │ ├── q8-contracted.sql.svg │ ├── q8.sql.dot │ ├── q8.sql.svg │ ├── q80-contracted.sql.dot │ ├── q80-contracted.sql.svg │ ├── q80.sql.dot │ ├── q80.sql.svg │ ├── q81-contracted.sql.dot │ ├── q81-contracted.sql.svg │ ├── q81.sql.dot │ ├── q81.sql.svg │ ├── q82-contracted.sql.dot │ ├── q82-contracted.sql.svg │ ├── q82.sql.dot │ ├── q82.sql.svg │ ├── q83-contracted.sql.dot │ ├── q83-contracted.sql.svg │ ├── q83.sql.dot │ ├── q83.sql.svg │ ├── q84-contracted.sql.dot │ ├── q84-contracted.sql.svg │ ├── q84.sql.dot │ ├── q84.sql.svg │ ├── q85-contracted.sql.dot │ ├── q85-contracted.sql.svg │ ├── q85.sql.dot │ ├── q85.sql.svg │ ├── q86-contracted.sql.dot │ ├── q86-contracted.sql.svg │ ├── q86.sql.dot │ ├── q86.sql.svg │ ├── q87-contracted.sql.dot │ ├── q87-contracted.sql.svg │ ├── q87.sql.dot │ ├── q87.sql.svg │ ├── q88-contracted.sql.dot │ ├── q88-contracted.sql.svg │ ├── q88.sql.dot │ ├── q88.sql.svg │ ├── q89-contracted.sql.dot │ ├── q89-contracted.sql.svg │ ├── q89.sql.dot │ ├── q89.sql.svg │ ├── q9-contracted.sql.dot │ ├── q9-contracted.sql.svg │ ├── q9.sql.dot │ ├── q9.sql.svg │ ├── q90-contracted.sql.dot │ ├── q90-contracted.sql.svg │ ├── q90.sql.dot │ ├── q90.sql.svg │ ├── q91-contracted.sql.dot │ ├── q91-contracted.sql.svg │ ├── q91.sql.dot │ ├── q91.sql.svg │ ├── q92-contracted.sql.dot │ ├── q92-contracted.sql.svg │ ├── q92.sql.dot │ ├── q92.sql.svg │ ├── q93-contracted.sql.dot │ ├── q93-contracted.sql.svg │ ├── q93.sql.dot │ ├── q93.sql.svg │ ├── q94-contracted.sql.dot │ ├── q94-contracted.sql.svg │ ├── q94.sql.dot │ ├── q94.sql.svg │ ├── q95-contracted.sql.dot │ ├── q95-contracted.sql.svg │ ├── q95.sql.dot │ ├── q95.sql.svg │ ├── q96-contracted.sql.dot │ ├── q96-contracted.sql.svg │ ├── q96.sql.dot │ ├── q96.sql.svg │ ├── q97-contracted.sql.dot │ ├── q97-contracted.sql.svg │ ├── q97.sql.dot │ ├── q97.sql.svg │ ├── q98-contracted.sql.dot │ ├── q98-contracted.sql.svg │ ├── q98.sql.dot │ ├── q98.sql.svg │ ├── q99-contracted.sql.dot │ ├── q99-contracted.sql.svg │ ├── q99.sql.dot │ └── q99.sql.svg └── scala └── org └── apache └── spark └── sql └── flow ├── Neo4jAuraTest.scala ├── SQLFlowSuite.scala ├── SQLFlowTestSuite.scala ├── SQLFlowTestUtils.scala ├── TPCDSFlowTestSuite.scala ├── TPCDSFlowWithNeo4jAuraSink.scala ├── TPCDSSchema.scala ├── TPCDSTest.scala └── sink ├── GraphSinkSuite.scala └── Neo4jAuraSinkSuite.scala /.github/actions/setup/action.yml: -------------------------------------------------------------------------------- 1 | inputs: 2 | java: 3 | required: true 4 | type: string 5 | python: 6 | required: true 7 | type: string 8 | 9 | runs: 10 | using: "composite" 11 | steps: 12 | # Cache local repositories. Note that GitHub Actions cache has a 2G limit. 13 | - name: Cache Scala, Maven and Zinc 14 | uses: actions/cache@v1 15 | with: 16 | path: build 17 | key: build-${{ hashFiles('**/pom.xml') }} 18 | restore-keys: | 19 | build- 20 | - name: Cache Maven local repository 21 | uses: actions/cache@v2 22 | with: 23 | path: ~/.m2/repository 24 | key: ${{ inputs.java }}-maven-${{ hashFiles('**/pom.xml') }} 25 | restore-keys: | 26 | ${{ inputs.java }}-maven- 27 | - name: Install JDK ${{ inputs.java }} 28 | uses: actions/setup-java@v1 29 | with: 30 | java-version: ${{ inputs.java }} 31 | - name: Install Python ${{ inputs.python }} 32 | uses: actions/setup-python@v2 33 | with: 34 | python-version: ${{ inputs.python }} 35 | architecture: x64 36 | - name: Install Python packages (Python ${{ inputs.python }}) 37 | shell: bash 38 | run: python -m pip install -r ./bin/requirements.txt 39 | - name: Install Graphviz 40 | shell: bash 41 | run: | 42 | sudo apt-get update -y 43 | sudo apt-get install -y graphviz 44 | - name: Install mermaid-cli 45 | shell: bash 46 | run: | 47 | npm install @mermaid-js/mermaid-cli 48 | echo "PATH=$PATH:`pwd`/node_modules/.bin" >> $GITHUB_ENV 49 | -------------------------------------------------------------------------------- /.github/workflows/build_and_test.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | 3 | on: 4 | push: 5 | branches: 6 | - spark-3.2 7 | 8 | jobs: 9 | tests: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | java: [1.8, 11] 15 | python: [3.7, 3.8] 16 | env: 17 | SPARK_LOCAL_IP: localhost 18 | CONDA_DISABLED: 1 19 | MAVEN_OPTS: -Xmx1g 20 | steps: 21 | - name: Checkout spark-sql-flow-plugin repository 22 | uses: actions/checkout@v2 23 | # In order to fetch changed files 24 | with: 25 | fetch-depth: 0 26 | - name: Init test env 27 | uses: ./.github/actions/setup 28 | with: 29 | java: ${{ matrix.java }} 30 | python: ${{ matrix.python }} 31 | - name: Run tests 32 | run: | 33 | ./build/mvn -q clean package -DskipTests 34 | ./build/mvn -q test 35 | ./bin/run-tests --parallelism 1 36 | - name: Upload test results to report 37 | if: always() 38 | uses: actions/upload-artifact@v2 39 | with: 40 | name: test-results-jdk${{ matrix.java }}-python${{ matrix.python}} 41 | path: "./target/surefire-reports/*.xml" 42 | - name: Upload unit tests log files 43 | if: failure() 44 | uses: actions/upload-artifact@v2 45 | with: 46 | name: unit-tests-log-jdk${{ matrix.java }}-python${{ matrix.python}} 47 | path: "./target/unit-tests.log" 48 | 49 | neo4j-tests-java8-python37: 50 | uses: maropu/spark-sql-flow-plugin/.github/workflows/neo4j_tests.yml@spark-3.2 51 | with: 52 | java: 1.8 53 | python: 3.7 54 | secrets: 55 | uri: ${{ secrets.NEO4J_AURADB_URI}} 56 | user: ${{ secrets.NEO4J_AURADB_USER}} 57 | passwd: ${{ secrets.NEO4J_AURADB_PASSWD}} 58 | 59 | neo4j-tests-java8-python38: 60 | # Since the neo4j integration tests will access the same Neo4j Aura instance, 61 | # they cannot run concurrently. 62 | needs: neo4j-tests-java8-python37 63 | uses: maropu/spark-sql-flow-plugin/.github/workflows/neo4j_tests.yml@spark-3.2 64 | with: 65 | java: 1.8 66 | python: 3.8 67 | secrets: 68 | uri: ${{ secrets.NEO4J_AURADB_URI}} 69 | user: ${{ secrets.NEO4J_AURADB_USER}} 70 | passwd: ${{ secrets.NEO4J_AURADB_PASSWD}} 71 | 72 | neo4j-tests-java11-python37: 73 | needs: neo4j-tests-java8-python38 74 | uses: maropu/spark-sql-flow-plugin/.github/workflows/neo4j_tests.yml@spark-3.2 75 | with: 76 | java: 11 77 | python: 3.7 78 | secrets: 79 | uri: ${{ secrets.NEO4J_AURADB_URI}} 80 | user: ${{ secrets.NEO4J_AURADB_USER}} 81 | passwd: ${{ secrets.NEO4J_AURADB_PASSWD}} 82 | 83 | neo4j-tests-java11-python38: 84 | needs: neo4j-tests-java11-python37 85 | uses: maropu/spark-sql-flow-plugin/.github/workflows/neo4j_tests.yml@spark-3.2 86 | with: 87 | java: 11 88 | python: 3.8 89 | secrets: 90 | uri: ${{ secrets.NEO4J_AURADB_URI}} 91 | user: ${{ secrets.NEO4J_AURADB_USER}} 92 | passwd: ${{ secrets.NEO4J_AURADB_PASSWD}} 93 | -------------------------------------------------------------------------------- /.github/workflows/codeql_analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ spark-3.2 ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ spark-3.2 ] 20 | schedule: 21 | - cron: '41 16 * * 0' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'python' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v2 42 | 43 | # Initializes the CodeQL tools for scanning. 44 | - name: Initialize CodeQL 45 | uses: github/codeql-action/init@v1 46 | with: 47 | languages: ${{ matrix.language }} 48 | # If you wish to specify custom queries, you can do so here or in a config file. 49 | # By default, queries listed here will override any specified in a config file. 50 | # Prefix the list here with "+" to use these queries and those in the config file. 51 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 52 | 53 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 54 | # If this step fails, then you should remove it and run the build manually (see below) 55 | - name: Autobuild 56 | uses: github/codeql-action/autobuild@v1 57 | 58 | # ℹ️ Command-line programs to run using the OS shell. 59 | # 📚 https://git.io/JvXDl 60 | 61 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 62 | # and modify them (or add more) to build your code if your project 63 | # uses a compiled language 64 | 65 | #- run: | 66 | # make bootstrap 67 | # make release 68 | 69 | - name: Perform CodeQL Analysis 70 | uses: github/codeql-action/analyze@v1 71 | -------------------------------------------------------------------------------- /.github/workflows/neo4j_tests.yml: -------------------------------------------------------------------------------- 1 | name: Reusable workflow for Neo4j tests 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | java: 7 | required: true 8 | type: string 9 | python: 10 | required: true 11 | type: string 12 | secrets: 13 | uri: 14 | required: true 15 | user: 16 | required: true 17 | passwd: 18 | required: true 19 | 20 | jobs: 21 | neo4j-tests: 22 | name: Run Neo4j tests 23 | runs-on: ubuntu-latest 24 | env: 25 | SPARK_LOCAL_IP: localhost 26 | MAVEN_OPTS: -Xmx1g 27 | steps: 28 | - name: Checkout spark-sql-flow-plugin repository 29 | uses: actions/checkout@v2 30 | # In order to fetch changed files 31 | with: 32 | fetch-depth: 0 33 | - name: Init test env 34 | uses: ./.github/actions/setup 35 | with: 36 | java: ${{ inputs.java }} 37 | python: ${{ inputs.python}} 38 | - name: Run tests 39 | env: 40 | NEO4J_AURADB_URI: ${{ secrets.uri}} 41 | NEO4J_AURADB_USER: ${{ secrets.user}} 42 | NEO4J_AURADB_PASSWD: ${{ secrets.passwd}} 43 | run: | 44 | ./build/mvn -q clean package -DskipTests 45 | ./build/mvn -q -Dtest=none -DwildcardSuites=org.apache.spark.sql.flow.sink.Neo4jAuraSinkSuite test 46 | ./build/mvn -q -Dtest=none -DwildcardSuites=org.apache.spark.sql.flow.TPCDSFlowWithNeo4jAuraSink test 47 | ./bin/run-tests --parallelism 1 --testnames test_neo4jaura_sink 48 | - name: Upload test results to report 49 | if: always() 50 | uses: actions/upload-artifact@v2 51 | with: 52 | name: neo4j-test-results-jdk${{ inputs.java }}-python${{ inputs.python}} 53 | path: "./target/surefire-reports/*.xml" 54 | - name: Upload unit tests log files 55 | if: failure() 56 | uses: actions/upload-artifact@v2 57 | with: 58 | name: neo4j-unit-tests-log-jdk${{ inputs.java }}-python${{ inputs.python}} 59 | path: "./target/unit-tests.log" 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *#*# 2 | *.#* 3 | *.iml 4 | *.ipr 5 | *.iws 6 | *.pyc 7 | *.pyo 8 | *.swp 9 | *~ 10 | .DS_Store 11 | .cache 12 | .classpath 13 | .ensime 14 | .ensime_cache/ 15 | .ensime_lucene 16 | .generated-mima* 17 | .idea/ 18 | .idea_modules/ 19 | .project 20 | .pydevproject 21 | .scala_dependencies 22 | .settings 23 | /lib/ 24 | R-unit-tests.log 25 | R/unit-tests.out 26 | R/cran-check.out 27 | R/pkg/vignettes/sparkr-vignettes.html 28 | R/pkg/tests/fulltests/Rplots.pdf 29 | build/*.jar 30 | build/apache-maven* 31 | build/scala* 32 | build/zinc* 33 | cache 34 | checkpoint 35 | conf/*.cmd 36 | conf/*.conf 37 | conf/*.properties 38 | conf/*.sh 39 | conf/*.xml 40 | conf/java-opts 41 | conf/slaves 42 | dependency-reduced-pom.xml 43 | derby.log 44 | dev/create-release/*final 45 | dev/create-release/*txt 46 | dev/pr-deps/ 47 | dist/ 48 | docs/_site 49 | docs/api 50 | lib_managed/ 51 | lint-r-report.log 52 | log/ 53 | logs/ 54 | out/ 55 | project/boot/ 56 | project/build/target/ 57 | project/plugins/lib_managed/ 58 | project/plugins/project/build.properties 59 | project/plugins/src_managed/ 60 | project/plugins/target/ 61 | python/lib/pyspark.zip 62 | python/deps 63 | python/pyspark/python 64 | scalastyle-on-compile.generated.xml 65 | scalastyle-output.xml 66 | scalastyle.txt 67 | spark-*-bin-*.tgz 68 | spark-tests.log 69 | src_managed/ 70 | streaming-tests.log 71 | target/ 72 | unit-tests.log 73 | work/ 74 | bin/spark-*-bin-hadoop* 75 | 76 | # For Hive 77 | TempStatsStore/ 78 | metastore/ 79 | metastore_db/ 80 | sql/hive-thriftserver/test_warehouses 81 | warehouse/ 82 | spark-warehouse/ 83 | 84 | # For R session data 85 | .RData 86 | .RHistory 87 | .Rhistory 88 | *.Rproj 89 | *.Rproj.* 90 | 91 | .Rproj.user 92 | 93 | # For additional settings 94 | bin/spark-master 95 | .ipynb_checkpoints 96 | -------------------------------------------------------------------------------- /assembly/spark-sql-flow-plugin_2.12_spark3.2_1.0.0-SNAPSHOT-with-dependencies.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maropu/spark-sql-flow-plugin/57d9db75d544706f1693d9a06a9202fc96e64b3e/assembly/spark-sql-flow-plugin_2.12_spark3.2_1.0.0-SNAPSHOT-with-dependencies.jar -------------------------------------------------------------------------------- /assembly/spark-sql-flow-plugin_2.12_spark3.2_1.0.0-SNAPSHOT.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maropu/spark-sql-flow-plugin/57d9db75d544706f1693d9a06a9202fc96e64b3e/assembly/spark-sql-flow-plugin_2.12_spark3.2_1.0.0-SNAPSHOT.jar -------------------------------------------------------------------------------- /bin/.startup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | from sqlflow import * 21 | -------------------------------------------------------------------------------- /bin/activate-conda-env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # 21 | # Activate conda env based on bin/conda.yml 22 | 23 | FWDIR="$(cd "`dirname $BASH_SOURCE`"/..; pwd)" 24 | 25 | # Activate a conda virtual env 26 | . ${FWDIR}/bin/conda.sh && activate_conda_virtual_env "${FWDIR}" 27 | -------------------------------------------------------------------------------- /bin/conda.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | activate_conda_virtual_env() { 21 | local _ROOTT_DIR="$1" 22 | 23 | # Loads some variables from pom.xml 24 | . ${_ROOTT_DIR}/bin/package.sh "${_ROOTT_DIR}" 25 | 26 | # Creates a virtual env to resolve Python dependencies 27 | CONDA_COMMAND=${_ROOTT_DIR}/bin/conda.py 28 | CONDA_ENV_ID=`grep "" "${_ROOTT_DIR}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` 29 | ${CONDA_COMMAND} --command=create_env_only --env_id=${CONDA_ENV_ID} 30 | echo -ne "\n=== Launching conda virtual env '$(${CONDA_COMMAND} --command=get_env_name --env_id=${CONDA_ENV_ID})' ===\n" 31 | 32 | # Gets virtual env home 33 | CONDA_ENV_HOME=$(${CONDA_COMMAND} --command=get_env_home --env_id=${CONDA_ENV_ID}) 34 | 35 | # Then, activates the virtual env 36 | ACTIVATE_CONDA_ENV=$(${CONDA_COMMAND} --command=get_activate_command --env_id=${CONDA_ENV_ID}) 37 | eval "${ACTIVATE_CONDA_ENV}" 38 | } 39 | 40 | -------------------------------------------------------------------------------- /bin/conda.yml: -------------------------------------------------------------------------------- 1 | # Generated by 'conda env export > conda.yml' 2 | name: base 3 | channels: 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - python=3.7.11 8 | - pyspark=3.2.0 9 | - pyarrow=1.0.0 10 | - jupyterlab=3.2.0 11 | - networkx=2.6.3 12 | - pydot=1.3.0 13 | - mypy=0.910 14 | - flake8=3.8.4 15 | - pip: 16 | - neo4j==4.4.1 17 | -------------------------------------------------------------------------------- /bin/install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # Determines the current working directory 21 | _DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 22 | 23 | # Installs any application tarball given a URL, the expected tarball name, 24 | # and, optionally, a checkable binary path to determine if the binary has 25 | # already been installed 26 | ## Arg1 - URL 27 | ## Arg2 - Tarball Name 28 | ## Arg3 - Checkable Binary 29 | install_app() { 30 | local remote_archive="$1/$2" 31 | local local_archive="${_DIR}/$2" 32 | local binary="${_DIR}/$3" 33 | 34 | if [ -z "$3" -o ! -f "$binary" ]; then 35 | download_app "${remote_archive}" "${local_archive}" 36 | 37 | case "$local_archive" in 38 | *\.tgz | *\.tar.gz) 39 | cd "${_DIR}" && tar -xzf "$2" 40 | ;; 41 | *\.zip) 42 | cd "${_DIR}" && unzip "$2" 43 | ;; 44 | esac 45 | rm -rf "$local_archive" 46 | fi 47 | } 48 | 49 | # Downloads any application given a URL 50 | ## Arg1 - Remote URL 51 | ## Arg2 - Local file name 52 | download_app() { 53 | local remote_url="$1" 54 | local local_name="$2" 55 | 56 | # setup `curl` and `wget` options 57 | local curl_opts="--progress-bar -L" 58 | local wget_opts="--progress=bar:force" 59 | 60 | # checks if we already have the given application 61 | # checks if we have curl installed 62 | # downloads application 63 | [ ! -f "${local_name}" ] && [ $(command -v curl) ] && \ 64 | echo "exec: curl ${curl_opts} ${remote_url}" 1>&2 && \ 65 | curl ${curl_opts} "${remote_url}" > "${local_name}" 66 | # if the file still doesn't exist, lets try `wget` and cross our fingers 67 | [ ! -f "${local_name}" ] && [ $(command -v wget) ] && \ 68 | echo "exec: wget ${wget_opts} ${remote_url}" 1>&2 && \ 69 | wget ${wget_opts} -O "${local_name}" "${remote_url}" 70 | # if both were unsuccessful, exit 71 | [ ! -f "${local_name}" ] && \ 72 | echo -n "ERROR: Cannot download $2 with cURL or wget; " && \ 73 | echo "please install manually and try again." && \ 74 | exit 2 75 | } 76 | 77 | # Determines the Spark version from the root pom.xml file and 78 | # installs Spark under the bin/ folder if needed. 79 | install_spark() { 80 | local spark_version=`grep "" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` 81 | local hadoop_version=`grep "" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` 82 | local apache_mirror=${APACHE_MIRROR:-"https://dlcdn.apache.org/spark/spark-${spark_version}"} 83 | 84 | install_app \ 85 | "${apache_mirror}" \ 86 | "spark-${spark_version}-bin-hadoop${hadoop_version}.tgz" \ 87 | "spark-${spark_version}-bin-hadoop${hadoop_version}/bin/spark-shell" 88 | 89 | SPARK_DIR="${_DIR}/spark-${spark_version}-bin-hadoop${hadoop_version}" 90 | SPARK_SUBMIT="${SPARK_DIR}/bin/spark-submit" 91 | } 92 | 93 | -------------------------------------------------------------------------------- /bin/launch-jupyter-lab.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # 21 | # Launch a JupyterLab 22 | 23 | FWDIR="$(cd "`dirname $0`"/..; pwd)" 24 | 25 | if [ -z "$CONDA_DISABLED" ]; then 26 | # Activate a conda virtual env 27 | . ${FWDIR}/bin/conda.sh && activate_conda_virtual_env "${FWDIR}" 28 | fi 29 | 30 | if [ ! -z "$JUPYTER_EXT_INIT" ]; then 31 | # Install Jupyter extensions 32 | jupyter labextension install jupyterlab-code-snippets 33 | jupyter labextension install @axlair/jupyterlab_vim 34 | # jupyter labextension install @krassowski/jupyterlab-lsp 35 | jupyter labextension install @kiteco/jupyterlab-kite 36 | # jupyter labextension install @lckr/jupyterlab_variableinspector 37 | jupyter lab clean 38 | jupyter lab build 39 | fi 40 | 41 | PYTHONPATH="${FWDIR}/python" JUPYTERLAB_SETTINGS_DIR=${FWDIR}/.jupyter jupyter lab 42 | -------------------------------------------------------------------------------- /bin/mypy.ini: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | [mypy] 17 | strict_optional = True 18 | no_implicit_optional = True 19 | disallow_untyped_defs = True 20 | ignore_missing_imports = True 21 | exclude = (python/tests/.*py$) 22 | -------------------------------------------------------------------------------- /bin/package.sh: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | get_package_variables_from_pom() { 19 | local _ROOTT_DIR="$1" 20 | 21 | PACKAGE_NAME=`grep "" "${_ROOTT_DIR}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` 22 | PACKAGE_VERSION=`grep "" "${_ROOTT_DIR}/pom.xml" | head -n2 | tail -n1 | awk -F '[<>]' '{print $3}'` 23 | SCALA_VERSION=`grep "" "${_ROOTT_DIR}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` 24 | SCALA_BINARY_VERSION=`grep "" "${_ROOTT_DIR}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` 25 | SPARK_VERSION=`grep "" "${_ROOTT_DIR}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` 26 | SPARK_BINARY_VERSION=`grep "" "${_ROOTT_DIR}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` 27 | PACKAGE_JAR_NAME="${PACKAGE_NAME}_${SCALA_BINARY_VERSION}_spark${SPARK_BINARY_VERSION}_${PACKAGE_VERSION}-with-dependencies.jar" 28 | } 29 | 30 | -------------------------------------------------------------------------------- /bin/python: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | set -e -o pipefail 21 | 22 | # Sets the root directory 23 | ROOT_DIR="$(cd "`dirname $0`"/..; pwd)" 24 | 25 | # Loads some variables from `pom.xml` 26 | . ${ROOT_DIR}/bin/package.sh && get_package_variables_from_pom "${ROOT_DIR}" 27 | 28 | # Splits input arguments into two parts: pyspark confs and args 29 | parse_args_for_pyspark() { 30 | SPARK_CONF=() 31 | ARGS=() 32 | while [ ! -z "$1" ]; do 33 | if [[ "$1" =~ ^--master= ]]; then 34 | SPARK_CONF+=($1) 35 | elif [ "$1" == "--conf" ]; then 36 | shift 37 | SPARK_CONF+=("--conf $1") 38 | else 39 | ARGS+=($1) 40 | fi 41 | shift 42 | done 43 | } 44 | 45 | find_package() { 46 | local _BUILT_PACKAGE="${ROOT_DIR}/target/${PACKAGE_JAR_NAME}" 47 | if [ -e "$_BUILT_PACKAGE" ]; then 48 | PACKAGE=$_BUILT_PACKAGE 49 | else 50 | PACKAGE="${ROOT_DIR}/assembly/${PACKAGE_JAR_NAME}" 51 | echo "${_BUILT_PACKAGE} not found, so use pre-compiled ${PACKAGE}" 1>&2 52 | fi 53 | } 54 | 55 | # Joins an input array by a given separator 56 | join_by() { 57 | local IFS="$1" 58 | shift 59 | echo "$*" 60 | } 61 | 62 | # Do some preparations before launching pyspark 63 | parse_args_for_pyspark "$@" 64 | find_package 65 | 66 | if [ -z "$CONDA_DISABLED" ]; then 67 | # Activate a conda virtual env 68 | . ${ROOT_DIR}/bin/conda.sh && activate_conda_virtual_env "${ROOT_DIR}" 69 | fi 70 | 71 | # Then, launches a pyspark with given arguments 72 | if [ -z "${PYTHONPATH}" ]; then 73 | PYTHONPATH="${ROOT_DIR}/python" 74 | else 75 | PYTHONPATH="${ROOT_DIR}/python:${PYTHONPATH}" 76 | fi 77 | PYTHONPATH=${PYTHONPATH} \ 78 | PYTHONSTARTUP="${ROOT_DIR}/bin/.startup.py" \ 79 | exec pyspark --jars=${PACKAGE} $(join_by " " ${SPARK_CONF[@]}) $(join_by " " ${ARGS[@]}) 80 | 81 | -------------------------------------------------------------------------------- /bin/requirements.txt: -------------------------------------------------------------------------------- 1 | # Synced with ./bin/conda.yml 2 | pyspark==3.2.0 3 | pyarrow==1.0.0 4 | neo4j==4.4.1 5 | mypy==0.910 6 | flake8==3.8.4 7 | -------------------------------------------------------------------------------- /bin/run-tests: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | FWDIR="$(cd "`dirname $0`"/..; pwd)" 21 | cd "$FWDIR" 22 | 23 | if [ -z "$CONDA_DISABLED" ]; then 24 | # Activate a conda virtual env 25 | . ./bin/conda.sh && activate_conda_virtual_env "${FWDIR}" 26 | fi 27 | 28 | PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 7, 0))') 29 | if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then 30 | echo "Python versions prior to 3.7 are not supported." 31 | exit -1 32 | fi 33 | 34 | # Static code analysis before running unit tests 35 | ./bin/lint-python || exit -1 36 | 37 | # Loads some variables from `pom.xml` 38 | . ./bin/package.sh && get_package_variables_from_pom "${FWDIR}" 39 | 40 | # Resolves a path of the test package 41 | PACKAGE_JAR="${FWDIR}/target/${PACKAGE_JAR_NAME}" 42 | if [ ! -e "$PACKAGE_JAR" ]; then 43 | echo "${PACKAGE_JAR} not found, so you need to run './build/mvn clean package' first" 1>&2 44 | exit -1 45 | fi 46 | 47 | exec python3 -u ./bin/run-tests.py \ 48 | --package=${PACKAGE_JAR} --module ${FWDIR}/python "$@" 49 | -------------------------------------------------------------------------------- /bin/spark-shell: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # Sets the root directory 21 | ROOT_DIR="$(cd "`dirname $0`"/..; pwd)" 22 | 23 | if [ -z "${SPARK_HOME}" ]; then 24 | # Preserve the calling directory 25 | _CALLING_DIR="$(pwd)" 26 | 27 | # Install the proper version of Spark for this package 28 | . ${ROOT_DIR}/bin/install.sh 29 | install_spark 30 | 31 | # Reset the current working directory 32 | cd "${_CALLING_DIR}" 33 | else 34 | SPARK_DIR=${SPARK_HOME} 35 | fi 36 | 37 | # Loads some variables from `pom.xml` 38 | . ${ROOT_DIR}/bin/package.sh && get_package_variables_from_pom "${ROOT_DIR}" 39 | 40 | # Resolve a jar location for the TPCDS data generator 41 | find_resource() { 42 | local built_pkg="$ROOT_DIR/target/${PACKAGE_JAR_NAME}" 43 | if [ -e "$built_pkg" ]; then 44 | RESOURCE=$built_pkg 45 | else 46 | RESOURCE="$ROOT_DIR/assembly/${PACKAGE_JAR_NAME}" 47 | echo "${built_pkg} not found, so use pre-compiled ${RESOURCE}" 1>&2 48 | fi 49 | } 50 | 51 | echo "Using \`spark-shell\` from path: $SPARK_DIR" 1>&2 52 | find_resource && ${SPARK_DIR}/bin/spark-shell --conf spark.jars=${RESOURCE} "$@" 53 | 54 | -------------------------------------------------------------------------------- /bin/tox.ini: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | [pycodestyle] 17 | ignore=E226,E241,E305,E402,E722,E731,E741,W503,W504,W605 18 | max-line-length=120 19 | exclude=*/target/*,.git/*,bin/* 20 | 21 | [flake8] 22 | select = E901,E999,F821,F822,F823,F401,B006 23 | exclude=*/target/*,.git/*,bin/* 24 | max-line-length = 120 25 | -------------------------------------------------------------------------------- /md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maropu/spark-sql-flow-plugin/57d9db75d544706f1693d9a06a9202fc96e64b3e/md -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/flow/interfaces.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.flow 19 | 20 | import scala.collection.mutable 21 | 22 | object GraphNodeType extends Enumeration { 23 | val TableNode, ViewNode, PlanNode, LeafPlanNode, QueryNode = Value 24 | } 25 | 26 | case class SQLFlowGraphNode( 27 | uniqueId: String, 28 | ident: String, 29 | attributeNames: Seq[String], 30 | schemaDDL: String, 31 | tpe: GraphNodeType.Value, 32 | isCached: Boolean, 33 | props: mutable.Map[String, String] = mutable.Map.empty) { 34 | 35 | private def prettyTypeName(tpe: GraphNodeType.Value) = tpe match { 36 | case GraphNodeType.TableNode => "table" 37 | case GraphNodeType.ViewNode => "view" 38 | case GraphNodeType.PlanNode => "plan" 39 | case GraphNodeType.LeafPlanNode => "leaf_plan" 40 | case GraphNodeType.QueryNode => "query" 41 | } 42 | 43 | override def toString: String = { 44 | s"""name=`$ident`(${attributeNames.map(a => s"`$a`").mkString(",")}), """ + 45 | s"""type=${prettyTypeName(tpe)}, cached=$isCached""" 46 | } 47 | } 48 | 49 | case class SQLFlowGraphEdge( 50 | fromId: String, 51 | fromIdx: Option[Int], 52 | toId: String, 53 | toIdx: Option[Int]) { 54 | 55 | override def toString: String = { 56 | val fromIdxOpt = fromIdx.map(i => s"(idx=$i)").getOrElse("") 57 | val toIdxOpt = toIdx.map(i => s"(idx=$i)").getOrElse("") 58 | s"""from=`$fromId`$fromIdxOpt, to=`$toId`$toIdxOpt""" 59 | } 60 | } 61 | 62 | trait BaseGraphBatchSink { 63 | def write(nodes: Seq[SQLFlowGraphNode], edges: Seq[SQLFlowGraphEdge], 64 | options: Map[String, String]): Unit 65 | } 66 | 67 | trait BaseGraphStreamSink { 68 | def append(nodes: Seq[SQLFlowGraphNode], edges: Seq[SQLFlowGraphEdge], 69 | options: Map[String, String]): Unit 70 | } -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/flow/sink/MermaidSink.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.flow.sink 19 | 20 | import java.io.File 21 | 22 | import org.apache.spark.sql.flow.{GraphNodeType, SQLFlowGraphEdge, SQLFlowGraphNode} 23 | 24 | /** 25 | * This class transforms an input graph into a Mermaid-formatted flowchart. 26 | * - https://mermaid-js.github.io/mermaid/#/flowchart 27 | */ 28 | case class MermaidSink(imgFormat: String = "svg") extends GraphFileBatchSink { 29 | 30 | override def filenameSuffix: String = "mmd" 31 | 32 | private val className = { 33 | getClass.getCanonicalName 34 | } 35 | 36 | private def tryGenerateImageFile(options: Map[String, String]): Unit = { 37 | val dirPath = getOutputDirPathFrom(options) 38 | val filenamePrefix = getFilenamePrefixFrom(options) 39 | val mmdFile = new File(dirPath, s"$filenamePrefix.$filenameSuffix").getAbsolutePath 40 | val dstFile = new File(dirPath, s"$filenamePrefix.$imgFormat").getAbsolutePath 41 | val arguments = s"-i $mmdFile -o $dstFile" 42 | SinkUtils.tryToExecuteCommand("mmdc", arguments) 43 | } 44 | 45 | override def write( 46 | nodes: Seq[SQLFlowGraphNode], 47 | edges: Seq[SQLFlowGraphEdge], 48 | options: Map[String, String]): Unit = { 49 | super.write(nodes, edges, options) 50 | tryGenerateImageFile(options) 51 | } 52 | 53 | override def toGraphString(nodes: Seq[SQLFlowGraphNode], edges: Seq[SQLFlowGraphEdge]): String = { 54 | val nodeStrings = nodes.map { n => 55 | val nodeDesc = n.tpe match { 56 | case GraphNodeType.TableNode | GraphNodeType.ViewNode | GraphNodeType.QueryNode => 57 | val desc = s""""${n.ident}(${n.attributeNames.mkString(",")})"""" 58 | s"[[$desc]]" 59 | case _ => 60 | s"(${n.ident})" 61 | } 62 | s" ${n.uniqueId}$nodeDesc" 63 | } 64 | val compactEdges = edges.map { e => (e.fromId, e.toId) }.distinct 65 | val edgeStrings = compactEdges.map { case (fromId, toId) => 66 | s""" $fromId-->$toId""" 67 | } 68 | s""" 69 | |%% Automatically generated by $className 70 | |flowchart LR 71 | |${nodeStrings.mkString("\n")} 72 | |${edgeStrings.mkString("\n")} 73 | """.stripMargin 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Set everything to be logged to the file sql/sql-server/target/unit-tests.log 19 | log4j.rootLogger=INFO, CA, FA 20 | 21 | #Console Appender 22 | log4j.appender.CA=org.apache.log4j.ConsoleAppender 23 | log4j.appender.CA.layout=org.apache.log4j.PatternLayout 24 | log4j.appender.CA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %p %c: %m%n 25 | log4j.appender.CA.Threshold = WARN 26 | log4j.appender.CA.follow = true 27 | 28 | 29 | #File Appender 30 | log4j.appender.FA=org.apache.log4j.FileAppender 31 | log4j.appender.FA.append=false 32 | log4j.appender.FA.file=target/unit-tests.log 33 | log4j.appender.FA.layout=org.apache.log4j.PatternLayout 34 | log4j.appender.FA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %t %p %c{1}: %m%n 35 | 36 | # Set the logger level of File Appender to WARN 37 | log4j.appender.FA.Threshold = INFO 38 | 39 | # Some packages are noisy for no good reason. 40 | log4j.additivity.org.apache.parquet.hadoop.ParquetRecordReader=false 41 | log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=OFF 42 | 43 | log4j.additivity.org.apache.parquet.hadoop.ParquetOutputCommitter=false 44 | log4j.logger.org.apache.parquet.hadoop.ParquetOutputCommitter=OFF 45 | 46 | log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false 47 | log4j.logger.org.apache.hadoop.hive.serde2.lazy.LazyStruct=OFF 48 | 49 | log4j.additivity.org.apache.hadoop.hive.metastore.RetryingHMSHandler=false 50 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=OFF 51 | 52 | log4j.additivity.hive.ql.metadata.Hive=false 53 | log4j.logger.hive.ql.metadata.Hive=OFF 54 | 55 | # Parquet related logging 56 | log4j.logger.org.apache.parquet.hadoop=WARN 57 | log4j.logger.org.apache.spark.sql.parquet=INFO 58 | -------------------------------------------------------------------------------- /src/test/resources/sql-flow-tests/inputs/basics.sql: -------------------------------------------------------------------------------- 1 | -- Define Test input views 2 | CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1, 1), (1, 2) AS t1(a, b); 3 | CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1, 1), (1, 2) AS t2(a, b); 4 | 5 | -- Defines views for lineage tests 6 | CREATE OR REPLACE TEMPORARY VIEW t3 AS 7 | SELECT key AS a, sum(CAST(value AS INT)) AS b FROM testdata 8 | GROUP BY key HAVING b % 2 = 0; 9 | 10 | CREATE OR REPLACE TEMPORARY VIEW t4 AS 11 | SELECT a, b FROM t1 12 | UNION ALL 13 | SELECT a, b FROM t2 14 | UNION ALL 15 | SELECT a, b FROM t3; 16 | 17 | CREATE OR REPLACE TEMPORARY VIEW t5 AS 18 | SELECT a AS c FROM t2 WHERE b % 3 = 0 ORDER BY c; 19 | 20 | CREATE OR REPLACE TEMPORARY VIEW t6 AS 21 | SELECT t4.a, t3.b FROM t4 LEFT OUTER JOIN t3 ON t4.a = t3.a; 22 | 23 | -- LIMIT 24 | CREATE OR REPLACE TEMPORARY VIEW t7 AS 25 | SELECT * FROM testdata LIMIT 1; 26 | CREATE OR REPLACE TEMPORARY VIEW t8 AS 27 | SELECT * FROM (SELECT * FROM range(10) LIMIT 1) WHERE id > 3; 28 | 29 | -- Example queries in README.md 30 | CREATE OR REPLACE TEMPORARY VIEW v1 AS 31 | SELECT key, SUM(value) s FROM testdata GROUP BY key HAVING s > 100; 32 | 33 | CACHE TABLE v1; 34 | 35 | CREATE TEMPORARY VIEW v2 AS 36 | SELECT testdata.*, v1.s FROM testdata 37 | LEFT OUTER JOIN v1 ON testdata.key = v1.key; 38 | -------------------------------------------------------------------------------- /src/test/resources/sql-flow-tests/inputs/group-analytics.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES 2 | (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2) 3 | AS testData(a, b); 4 | 5 | -- CUBE on overlapping columns 6 | CREATE OR REPLACE TEMPORARY VIEW t1 AS 7 | SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH CUBE; 8 | 9 | CREATE OR REPLACE TEMPORARY VIEW t2 AS 10 | SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE; 11 | 12 | -- ROLLUP on overlapping columns 13 | CREATE OR REPLACE TEMPORARY VIEW t3 AS 14 | SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP; 15 | 16 | CREATE OR REPLACE TEMPORARY VIEW t4 AS 17 | SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP; 18 | 19 | CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES 20 | ("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000) 21 | AS courseSales(course, year, earnings); 22 | 23 | -- ROLLUP 24 | CREATE OR REPLACE TEMPORARY VIEW t5 AS 25 | SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year; 26 | 27 | -- CUBE 28 | CREATE OR REPLACE TEMPORARY VIEW t6 AS 29 | SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year; 30 | 31 | -- GROUPING SETS 32 | CREATE OR REPLACE TEMPORARY VIEW t7 AS 33 | SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year); 34 | CREATE OR REPLACE TEMPORARY VIEW t8 AS 35 | SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course); 36 | CREATE OR REPLACE TEMPORARY VIEW t9 AS 37 | SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year); 38 | 39 | -- GROUPING SETS with aggregate functions containing groupBy columns 40 | CREATE OR REPLACE TEMPORARY VIEW t10 AS 41 | SELECT course, SUM(earnings) AS sum FROM courseSales 42 | GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum; 43 | CREATE OR REPLACE TEMPORARY VIEW t11 AS 44 | SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales 45 | GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum; 46 | 47 | -- GROUPING/GROUPING_ID 48 | CREATE OR REPLACE TEMPORARY VIEW t12 AS 49 | SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales 50 | GROUP BY CUBE(course, year); 51 | CREATE OR REPLACE TEMPORARY VIEW t13 AS 52 | SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year; 53 | 54 | -- GROUPING/GROUPING_ID in having clause 55 | CREATE OR REPLACE TEMPORARY VIEW t14 AS 56 | SELECT course, year FROM courseSales GROUP BY CUBE(course, year) 57 | HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year; 58 | 59 | -- GROUPING/GROUPING_ID in orderBy clause 60 | CREATE OR REPLACE TEMPORARY VIEW t15 AS 61 | SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year) 62 | ORDER BY GROUPING(course), GROUPING(year), course, year; 63 | CREATE OR REPLACE TEMPORARY VIEW t16 AS 64 | SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year) 65 | ORDER BY GROUPING(course), GROUPING(year), course, year; 66 | CREATE OR REPLACE TEMPORARY VIEW t17 AS 67 | SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year; 68 | 69 | -- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS 70 | CREATE OR REPLACE TEMPORARY VIEW t18 AS 71 | SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2); 72 | CREATE OR REPLACE TEMPORARY VIEW t19 AS 73 | SELECT a + b AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b); 74 | CREATE OR REPLACE TEMPORARY VIEW t20 AS 75 | SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k) 76 | -------------------------------------------------------------------------------- /src/test/resources/sql-flow-tests/inputs/in-subquery.sql: -------------------------------------------------------------------------------- 1 | create temporary view tab_a as select * from values (1, 1) as tab_a(a1, b1); 2 | create temporary view tab_b as select * from values (1, 1) as tab_b(a2, b2); 3 | create temporary view struct_tab as select struct(col1 as a, col2 as b) as record from 4 | values (1, 1), (1, 2), (2, 1), (2, 2); 5 | 6 | CREATE OR REPLACE TEMPORARY VIEW t1 AS 7 | select 1 from tab_a where (a1, b1) not in (select a2, b2 from tab_b); 8 | 9 | -- Aliasing is needed as a workaround for SPARK-24443 10 | CREATE OR REPLACE TEMPORARY VIEW t2 AS 11 | select count(*) from struct_tab where record in 12 | (select (a2 as a, b2 as b) from tab_b); 13 | CREATE OR REPLACE TEMPORARY VIEW t3 AS 14 | select count(*) from struct_tab where record not in 15 | (select (a2 as a, b2 as b) from tab_b); 16 | -------------------------------------------------------------------------------- /src/test/resources/sql-flow-tests/inputs/joins.sql: -------------------------------------------------------------------------------- 1 | -- Define Test input views 2 | CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1, 1), (1, 2) AS t1(k, v1); 3 | CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1, 1), (1, 2) AS t2(k, v2); 4 | 5 | CREATE OR REPLACE TEMPORARY VIEW t3 AS 6 | SELECT t1.* FROM t1 NATURAL JOIN t2; 7 | 8 | CREATE OR REPLACE TEMPORARY VIEW t4 AS 9 | SELECT t1.* FROM t1 NATURAL LEFT JOIN t2 ORDER BY v1, v2; 10 | 11 | CREATE OR REPLACE TEMPORARY VIEW t5 AS 12 | SELECT t1.* FROM t1 NATURAL RIGHT JOIN t2; 13 | 14 | CREATE OR REPLACE TEMPORARY VIEW t6 AS 15 | SELECT t1.* FROM t1 NATURAL RIGHT JOIN t2 WHERE v1 > 0; 16 | 17 | CREATE OR REPLACE TEMPORARY VIEW t7 AS 18 | SELECT t1.* FROM t1 NATURAL FULL OUTER JOIN t2; 19 | 20 | CREATE OR REPLACE TEMPORARY VIEW t8 AS 21 | SELECT t1.* FROM t1 INNER JOIN t2 ON t1.k = t2.k; 22 | 23 | CREATE OR REPLACE TEMPORARY VIEW t9 AS 24 | SELECT t1.* FROM t1 LEFT OUTER JOIN t2 ON t1.k = t2.k; 25 | 26 | CREATE OR REPLACE TEMPORARY VIEW t10 AS 27 | SELECT t1.* FROM t1 RIGHT OUTER JOIN t2 ON t1.k = t2.k; 28 | 29 | CREATE OR REPLACE TEMPORARY VIEW t11 AS 30 | SELECT t1.* FROM t1 FULL OUTER JOIN t2 ON t1.k = t2.k; 31 | 32 | CREATE OR REPLACE TEMPORARY VIEW t12 AS 33 | SELECT t1.* FROM t1 SEMI JOIN t2 ON t1.k = t2.k; 34 | 35 | CREATE OR REPLACE TEMPORARY VIEW t13 AS 36 | SELECT t1.* FROM t1 ANTI JOIN t2 ON t1.k = t2.k; 37 | 38 | CREATE OR REPLACE TEMPORARY VIEW t13 AS 39 | SELECT t1.* FROM t1 CROSS JOIN t2; 40 | -------------------------------------------------------------------------------- /src/test/resources/sql-flow-tests/inputs/set-operations.sql: -------------------------------------------------------------------------------- 1 | -- Define Test input views 2 | CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1, 1), (1, 2) AS t1(a, b); 3 | CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1, 1), (1, 2) AS t2(a, b); 4 | 5 | CREATE OR REPLACE TEMPORARY VIEW t3 AS 6 | SELECT b FROM t1 7 | UNION 8 | SELECT a FROM t2; 9 | 10 | CREATE OR REPLACE TEMPORARY VIEW t4 AS 11 | SELECT a FROM t1 12 | INTERSECT 13 | SELECT b FROM t2; 14 | 15 | CREATE OR REPLACE TEMPORARY VIEW t5 AS 16 | SELECT a FROM t1 17 | EXCEPT 18 | SELECT a FROM t2; 19 | 20 | CREATE OR REPLACE TEMPORARY VIEW t6 AS 21 | SELECT * FROM t3 22 | UNION ALL 23 | SELECT * FROM t4 24 | UNION ALL 25 | SELECT * FROM t5; 26 | -------------------------------------------------------------------------------- /src/test/resources/sql-flow-tests/inputs/subquery-in-from.sql: -------------------------------------------------------------------------------- 1 | -- Aliased subqueries in FROM clause 2 | CREATE OR REPLACE TEMPORARY VIEW t1 AS 3 | SELECT * FROM (SELECT * FROM testData) AS t WHERE key = 1; 4 | 5 | CREATE OR REPLACE TEMPORARY VIEW t2 AS 6 | FROM (SELECT * FROM testData WHERE key = 1) AS t SELECT *; 7 | 8 | -- Optional `AS` keyword 9 | CREATE OR REPLACE TEMPORARY VIEW t3 AS 10 | SELECT * FROM (SELECT * FROM testData) t WHERE key = 1; 11 | 12 | CREATE OR REPLACE TEMPORARY VIEW t4 AS 13 | FROM (SELECT * FROM testData WHERE key = 1) t SELECT *; 14 | 15 | -- Disallow unaliased subqueries in FROM clause 16 | CREATE OR REPLACE TEMPORARY VIEW t5 AS 17 | SELECT * FROM (SELECT * FROM testData) WHERE key = 1; 18 | 19 | CREATE OR REPLACE TEMPORARY VIEW t6 AS 20 | FROM (SELECT * FROM testData WHERE key = 1) SELECT *; 21 | -------------------------------------------------------------------------------- /src/test/resources/sql-flow-tests/results/in-subquery-contracted.sql.dot: -------------------------------------------------------------------------------- 1 | // Automatically generated by SQLFlowTestSuite 2 | 3 | 4 | digraph { 5 | graph [pad="0.5" nodesep="0.5" ranksep="1" fontname="Helvetica" rankdir=LR]; 6 | node [shape=plaintext] 7 | 8 | 9 | "LocalRelation_6ba95df" [color="black" label=< 10 | 11 | 12 | 13 | 14 |
LocalRelation
col1
col2
>]; 15 | 16 | 17 | "struct_tab" [color="black" label=< 18 | 19 | 20 | 21 |
struct_tab
record
>]; 22 | 23 | 24 | "t1" [color="black" label=< 25 | 26 | 27 | 28 |
t1
1
>]; 29 | 30 | 31 | "t2" [color="black" label=< 32 | 33 | 34 | 35 |
t2
count(1)
>]; 36 | 37 | 38 | "t3" [color="black" label=< 39 | 40 | 41 | 42 |
t3
count(1)
>]; 43 | 44 | 45 | "tab_a" [color="black" label=< 46 | 47 | 48 | 49 | 50 |
tab_a
a1
b1
>]; 51 | 52 | 53 | "tab_a_679e602" [color="black" label=< 54 | 55 | 56 | 57 | 58 |
tab_a_679e602
a1
b1
>]; 59 | 60 | 61 | "tab_b" [color="black" label=< 62 | 63 | 64 | 65 | 66 |
tab_b
a2
b2
>]; 67 | 68 | 69 | "tab_b_9c908e2" [color="black" label=< 70 | 71 | 72 | 73 | 74 |
tab_b_9c908e2
a2
b2
>]; 75 | 76 | "LocalRelation_6ba95df":0 -> "struct_tab":0; 77 | "LocalRelation_6ba95df":1 -> "struct_tab":0; 78 | "struct_tab":0 -> "t2":nodeName; 79 | "struct_tab":0 -> "t3":nodeName; 80 | "tab_a":0 -> "t1":nodeName; 81 | "tab_a":1 -> "t1":nodeName; 82 | "tab_a_679e602":0 -> "tab_a":0; 83 | "tab_a_679e602":1 -> "tab_a":1; 84 | "tab_b":0 -> "t1":nodeName; 85 | "tab_b":0 -> "t2":nodeName; 86 | "tab_b":0 -> "t3":nodeName; 87 | "tab_b":1 -> "t1":nodeName; 88 | "tab_b":1 -> "t2":nodeName; 89 | "tab_b":1 -> "t3":nodeName; 90 | "tab_b_9c908e2":0 -> "tab_b":0; 91 | "tab_b_9c908e2":1 -> "tab_b":1; 92 | } 93 | -------------------------------------------------------------------------------- /src/test/resources/sql-flow-tests/results/set-operations-contracted.sql.dot: -------------------------------------------------------------------------------- 1 | // Automatically generated by SQLFlowTestSuite 2 | 3 | 4 | digraph { 5 | graph [pad="0.5" nodesep="0.5" ranksep="1" fontname="Helvetica" rankdir=LR]; 6 | node [shape=plaintext] 7 | 8 | 9 | "t1" [color="black" label=< 10 | 11 | 12 | 13 | 14 |
t1
a
b
>]; 15 | 16 | 17 | "t1_af315d3" [color="black" label=< 18 | 19 | 20 | 21 | 22 |
t1_af315d3
a
b
>]; 23 | 24 | 25 | "t2" [color="black" label=< 26 | 27 | 28 | 29 | 30 |
t2
a
b
>]; 31 | 32 | 33 | "t2_3e956ad" [color="black" label=< 34 | 35 | 36 | 37 | 38 |
t2_3e956ad
a
b
>]; 39 | 40 | 41 | "t3" [color="black" label=< 42 | 43 | 44 | 45 |
t3
b
>]; 46 | 47 | 48 | "t4" [color="black" label=< 49 | 50 | 51 | 52 |
t4
a
>]; 53 | 54 | 55 | "t5" [color="black" label=< 56 | 57 | 58 | 59 |
t5
a
>]; 60 | 61 | 62 | "t6" [color="black" label=< 63 | 64 | 65 | 66 |
t6
b
>]; 67 | 68 | "t1":0 -> "t4":0; 69 | "t1":0 -> "t5":0; 70 | "t1":1 -> "t3":0; 71 | "t1_af315d3":0 -> "t1":0; 72 | "t1_af315d3":1 -> "t1":1; 73 | "t2":0 -> "t3":0; 74 | "t2":0 -> "t5":0; 75 | "t2":1 -> "t4":0; 76 | "t2_3e956ad":0 -> "t2":0; 77 | "t2_3e956ad":1 -> "t2":1; 78 | "t3":0 -> "t6":0; 79 | "t4":0 -> "t6":0; 80 | "t5":0 -> "t6":0; 81 | } 82 | -------------------------------------------------------------------------------- /src/test/resources/sql-flow-tests/results/subquery-in-from-contracted.sql.dot: -------------------------------------------------------------------------------- 1 | // Automatically generated by SQLFlowTestSuite 2 | 3 | 4 | digraph { 5 | graph [pad="0.5" nodesep="0.5" ranksep="1" fontname="Helvetica" rankdir=LR]; 6 | node [shape=plaintext] 7 | 8 | 9 | "default.testdata" [color="black" label=< 10 | 11 | 12 | 13 | 14 |
default.testdata
key
value
>]; 15 | 16 | 17 | "t1" [color="black" label=< 18 | 19 | 20 | 21 | 22 |
t1
key
value
>]; 23 | 24 | 25 | "t2" [color="black" label=< 26 | 27 | 28 | 29 | 30 |
t2
key
value
>]; 31 | 32 | 33 | "t3" [color="black" label=< 34 | 35 | 36 | 37 | 38 |
t3
key
value
>]; 39 | 40 | 41 | "t4" [color="black" label=< 42 | 43 | 44 | 45 | 46 |
t4
key
value
>]; 47 | 48 | 49 | "t5" [color="black" label=< 50 | 51 | 52 | 53 | 54 |
t5
key
value
>]; 55 | 56 | 57 | "t6" [color="black" label=< 58 | 59 | 60 | 61 | 62 |
t6
key
value
>]; 63 | 64 | "default.testdata":0 -> "t2":0; 65 | "default.testdata":0 -> "t5":0; 66 | "default.testdata":1 -> "t2":1; 67 | "default.testdata":1 -> "t5":1; 68 | "t2":0 -> "t4":0; 69 | "t2":0 -> "t6":0; 70 | "t2":1 -> "t4":1; 71 | "t2":1 -> "t6":1; 72 | "t5":0 -> "t1":0; 73 | "t5":0 -> "t3":0; 74 | "t5":1 -> "t1":1; 75 | "t5":1 -> "t3":1; 76 | } 77 | -------------------------------------------------------------------------------- /src/test/resources/sql-flow-tests/results/subquery-in-from.sql.dot: -------------------------------------------------------------------------------- 1 | // Automatically generated by SQLFlowTestSuite 2 | 3 | 4 | digraph { 5 | graph [pad="0.5" nodesep="0.5" ranksep="1" fontname="Helvetica" rankdir=LR]; 6 | node [shape=plaintext] 7 | 8 | 9 | "Filter_31acfe3" [label=< 10 | 11 | 12 | 13 | 14 |
Filter
key
value
>]; 15 | 16 | 17 | "Filter_729fc33" [label=< 18 | 19 | 20 | 21 | 22 |
Filter
key
value
>]; 23 | 24 | 25 | "default.testdata" [color="black" label=< 26 | 27 | 28 | 29 | 30 |
default.testdata
key
value
>]; 31 | 32 | 33 | "t1" [color="black" label=< 34 | 35 | 36 | 37 | 38 |
t1
key
value
>]; 39 | 40 | 41 | "t2" [color="black" label=< 42 | 43 | 44 | 45 | 46 |
t2
key
value
>]; 47 | 48 | 49 | "t3" [color="black" label=< 50 | 51 | 52 | 53 | 54 |
t3
key
value
>]; 55 | 56 | 57 | "t4" [color="black" label=< 58 | 59 | 60 | 61 | 62 |
t4
key
value
>]; 63 | 64 | 65 | "t5" [color="black" label=< 66 | 67 | 68 | 69 | 70 |
t5
key
value
>]; 71 | 72 | 73 | "t6" [color="black" label=< 74 | 75 | 76 | 77 | 78 |
t6
key
value
>]; 79 | 80 | "Filter_31acfe3":0 -> "t5":0; 81 | "Filter_31acfe3":1 -> "t5":1; 82 | "Filter_729fc33":0 -> "t2":0; 83 | "Filter_729fc33":1 -> "t2":1; 84 | "default.testdata":0 -> "Filter_31acfe3":0; 85 | "default.testdata":0 -> "Filter_729fc33":0; 86 | "default.testdata":1 -> "Filter_31acfe3":1; 87 | "default.testdata":1 -> "Filter_729fc33":1; 88 | "t2":0 -> "t4":0; 89 | "t2":0 -> "t6":0; 90 | "t2":1 -> "t4":1; 91 | "t2":1 -> "t6":1; 92 | "t5":0 -> "t1":0; 93 | "t5":0 -> "t3":0; 94 | "t5":1 -> "t1":1; 95 | "t5":1 -> "t3":1; 96 | } 97 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q1.sql: -------------------------------------------------------------------------------- 1 | WITH customer_total_return AS 2 | ( SELECT 3 | sr_customer_sk AS ctr_customer_sk, 4 | sr_store_sk AS ctr_store_sk, 5 | sum(sr_return_amt) AS ctr_total_return 6 | FROM store_returns, date_dim 7 | WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000 8 | GROUP BY sr_customer_sk, sr_store_sk) 9 | SELECT c_customer_id 10 | FROM customer_total_return ctr1, store, customer 11 | WHERE ctr1.ctr_total_return > 12 | (SELECT avg(ctr_total_return) * 1.2 13 | FROM customer_total_return ctr2 14 | WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk) 15 | AND s_store_sk = ctr1.ctr_store_sk 16 | AND s_state = 'TN' 17 | AND ctr1.ctr_customer_sk = c_customer_sk 18 | ORDER BY c_customer_id 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q10.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cd_gender, 3 | cd_marital_status, 4 | cd_education_status, 5 | count(*) cnt1, 6 | cd_purchase_estimate, 7 | count(*) cnt2, 8 | cd_credit_rating, 9 | count(*) cnt3, 10 | cd_dep_count, 11 | count(*) cnt4, 12 | cd_dep_employed_count, 13 | count(*) cnt5, 14 | cd_dep_college_count, 15 | count(*) cnt6 16 | FROM 17 | customer c, customer_address ca, customer_demographics 18 | WHERE 19 | c.c_current_addr_sk = ca.ca_address_sk AND 20 | ca_county IN ('Rush County', 'Toole County', 'Jefferson County', 21 | 'Dona Ana County', 'La Porte County') AND 22 | cd_demo_sk = c.c_current_cdemo_sk AND 23 | exists(SELECT * 24 | FROM store_sales, date_dim 25 | WHERE c.c_customer_sk = ss_customer_sk AND 26 | ss_sold_date_sk = d_date_sk AND 27 | d_year = 2002 AND 28 | d_moy BETWEEN 1 AND 1 + 3) AND 29 | (exists(SELECT * 30 | FROM web_sales, date_dim 31 | WHERE c.c_customer_sk = ws_bill_customer_sk AND 32 | ws_sold_date_sk = d_date_sk AND 33 | d_year = 2002 AND 34 | d_moy BETWEEN 1 AND 1 + 3) OR 35 | exists(SELECT * 36 | FROM catalog_sales, date_dim 37 | WHERE c.c_customer_sk = cs_ship_customer_sk AND 38 | cs_sold_date_sk = d_date_sk AND 39 | d_year = 2002 AND 40 | d_moy BETWEEN 1 AND 1 + 3)) 41 | GROUP BY cd_gender, 42 | cd_marital_status, 43 | cd_education_status, 44 | cd_purchase_estimate, 45 | cd_credit_rating, 46 | cd_dep_count, 47 | cd_dep_employed_count, 48 | cd_dep_college_count 49 | ORDER BY cd_gender, 50 | cd_marital_status, 51 | cd_education_status, 52 | cd_purchase_estimate, 53 | cd_credit_rating, 54 | cd_dep_count, 55 | cd_dep_employed_count, 56 | cd_dep_college_count 57 | LIMIT 100 58 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q11.sql: -------------------------------------------------------------------------------- 1 | WITH year_total AS ( 2 | SELECT 3 | c_customer_id customer_id, 4 | c_first_name customer_first_name, 5 | c_last_name customer_last_name, 6 | c_preferred_cust_flag customer_preferred_cust_flag, 7 | c_birth_country customer_birth_country, 8 | c_login customer_login, 9 | c_email_address customer_email_address, 10 | d_year dyear, 11 | sum(ss_ext_list_price - ss_ext_discount_amt) year_total, 12 | 's' sale_type 13 | FROM customer, store_sales, date_dim 14 | WHERE c_customer_sk = ss_customer_sk 15 | AND ss_sold_date_sk = d_date_sk 16 | GROUP BY c_customer_id 17 | , c_first_name 18 | , c_last_name 19 | , d_year 20 | , c_preferred_cust_flag 21 | , c_birth_country 22 | , c_login 23 | , c_email_address 24 | , d_year 25 | UNION ALL 26 | SELECT 27 | c_customer_id customer_id, 28 | c_first_name customer_first_name, 29 | c_last_name customer_last_name, 30 | c_preferred_cust_flag customer_preferred_cust_flag, 31 | c_birth_country customer_birth_country, 32 | c_login customer_login, 33 | c_email_address customer_email_address, 34 | d_year dyear, 35 | sum(ws_ext_list_price - ws_ext_discount_amt) year_total, 36 | 'w' sale_type 37 | FROM customer, web_sales, date_dim 38 | WHERE c_customer_sk = ws_bill_customer_sk 39 | AND ws_sold_date_sk = d_date_sk 40 | GROUP BY 41 | c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, 42 | c_login, c_email_address, d_year) 43 | SELECT t_s_secyear.customer_preferred_cust_flag 44 | FROM year_total t_s_firstyear 45 | , year_total t_s_secyear 46 | , year_total t_w_firstyear 47 | , year_total t_w_secyear 48 | WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id 49 | AND t_s_firstyear.customer_id = t_w_secyear.customer_id 50 | AND t_s_firstyear.customer_id = t_w_firstyear.customer_id 51 | AND t_s_firstyear.sale_type = 's' 52 | AND t_w_firstyear.sale_type = 'w' 53 | AND t_s_secyear.sale_type = 's' 54 | AND t_w_secyear.sale_type = 'w' 55 | AND t_s_firstyear.dyear = 2001 56 | AND t_s_secyear.dyear = 2001 + 1 57 | AND t_w_firstyear.dyear = 2001 58 | AND t_w_secyear.dyear = 2001 + 1 59 | AND t_s_firstyear.year_total > 0 60 | AND t_w_firstyear.year_total > 0 61 | AND CASE WHEN t_w_firstyear.year_total > 0 62 | THEN t_w_secyear.year_total / t_w_firstyear.year_total 63 | ELSE NULL END 64 | > CASE WHEN t_s_firstyear.year_total > 0 65 | THEN t_s_secyear.year_total / t_s_firstyear.year_total 66 | ELSE NULL END 67 | ORDER BY t_s_secyear.customer_preferred_cust_flag 68 | LIMIT 100 69 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q12.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_desc, 3 | i_category, 4 | i_class, 5 | i_current_price, 6 | sum(ws_ext_sales_price) AS itemrevenue, 7 | sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price)) 8 | OVER 9 | (PARTITION BY i_class) AS revenueratio 10 | FROM 11 | web_sales, item, date_dim 12 | WHERE 13 | ws_item_sk = i_item_sk 14 | AND i_category IN ('Sports', 'Books', 'Home') 15 | AND ws_sold_date_sk = d_date_sk 16 | AND d_date BETWEEN cast('1999-02-22' AS DATE) 17 | AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) 18 | GROUP BY 19 | i_item_id, i_item_desc, i_category, i_class, i_current_price 20 | ORDER BY 21 | i_category, i_class, i_item_id, i_item_desc, revenueratio 22 | LIMIT 100 23 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q13.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | avg(ss_quantity), 3 | avg(ss_ext_sales_price), 4 | avg(ss_ext_wholesale_cost), 5 | sum(ss_ext_wholesale_cost) 6 | FROM store_sales 7 | , store 8 | , customer_demographics 9 | , household_demographics 10 | , customer_address 11 | , date_dim 12 | WHERE s_store_sk = ss_store_sk 13 | AND ss_sold_date_sk = d_date_sk AND d_year = 2001 14 | AND ((ss_hdemo_sk = hd_demo_sk 15 | AND cd_demo_sk = ss_cdemo_sk 16 | AND cd_marital_status = 'M' 17 | AND cd_education_status = 'Advanced Degree' 18 | AND ss_sales_price BETWEEN 100.00 AND 150.00 19 | AND hd_dep_count = 3 20 | ) OR 21 | (ss_hdemo_sk = hd_demo_sk 22 | AND cd_demo_sk = ss_cdemo_sk 23 | AND cd_marital_status = 'S' 24 | AND cd_education_status = 'College' 25 | AND ss_sales_price BETWEEN 50.00 AND 100.00 26 | AND hd_dep_count = 1 27 | ) OR 28 | (ss_hdemo_sk = hd_demo_sk 29 | AND cd_demo_sk = ss_cdemo_sk 30 | AND cd_marital_status = 'W' 31 | AND cd_education_status = '2 yr Degree' 32 | AND ss_sales_price BETWEEN 150.00 AND 200.00 33 | AND hd_dep_count = 1 34 | )) 35 | AND ((ss_addr_sk = ca_address_sk 36 | AND ca_country = 'United States' 37 | AND ca_state IN ('TX', 'OH', 'TX') 38 | AND ss_net_profit BETWEEN 100 AND 200 39 | ) OR 40 | (ss_addr_sk = ca_address_sk 41 | AND ca_country = 'United States' 42 | AND ca_state IN ('OR', 'NM', 'KY') 43 | AND ss_net_profit BETWEEN 150 AND 300 44 | ) OR 45 | (ss_addr_sk = ca_address_sk 46 | AND ca_country = 'United States' 47 | AND ca_state IN ('VA', 'TX', 'MS') 48 | AND ss_net_profit BETWEEN 50 AND 250 49 | )) 50 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q14b.sql: -------------------------------------------------------------------------------- 1 | WITH cross_items AS 2 | (SELECT i_item_sk ss_item_sk 3 | FROM item, 4 | (SELECT 5 | iss.i_brand_id brand_id, 6 | iss.i_class_id class_id, 7 | iss.i_category_id category_id 8 | FROM store_sales, item iss, date_dim d1 9 | WHERE ss_item_sk = iss.i_item_sk 10 | AND ss_sold_date_sk = d1.d_date_sk 11 | AND d1.d_year BETWEEN 1999 AND 1999 + 2 12 | INTERSECT 13 | SELECT 14 | ics.i_brand_id, 15 | ics.i_class_id, 16 | ics.i_category_id 17 | FROM catalog_sales, item ics, date_dim d2 18 | WHERE cs_item_sk = ics.i_item_sk 19 | AND cs_sold_date_sk = d2.d_date_sk 20 | AND d2.d_year BETWEEN 1999 AND 1999 + 2 21 | INTERSECT 22 | SELECT 23 | iws.i_brand_id, 24 | iws.i_class_id, 25 | iws.i_category_id 26 | FROM web_sales, item iws, date_dim d3 27 | WHERE ws_item_sk = iws.i_item_sk 28 | AND ws_sold_date_sk = d3.d_date_sk 29 | AND d3.d_year BETWEEN 1999 AND 1999 + 2) x 30 | WHERE i_brand_id = brand_id 31 | AND i_class_id = class_id 32 | AND i_category_id = category_id 33 | ), 34 | avg_sales AS 35 | (SELECT avg(quantity * list_price) average_sales 36 | FROM (SELECT 37 | ss_quantity quantity, 38 | ss_list_price list_price 39 | FROM store_sales, date_dim 40 | WHERE ss_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 41 | UNION ALL 42 | SELECT 43 | cs_quantity quantity, 44 | cs_list_price list_price 45 | FROM catalog_sales, date_dim 46 | WHERE cs_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 47 | UNION ALL 48 | SELECT 49 | ws_quantity quantity, 50 | ws_list_price list_price 51 | FROM web_sales, date_dim 52 | WHERE ws_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2) x) 53 | SELECT * 54 | FROM 55 | (SELECT 56 | 'store' channel, 57 | i_brand_id, 58 | i_class_id, 59 | i_category_id, 60 | sum(ss_quantity * ss_list_price) sales, 61 | count(*) number_sales 62 | FROM store_sales, item, date_dim 63 | WHERE ss_item_sk IN (SELECT ss_item_sk 64 | FROM cross_items) 65 | AND ss_item_sk = i_item_sk 66 | AND ss_sold_date_sk = d_date_sk 67 | AND d_week_seq = (SELECT d_week_seq 68 | FROM date_dim 69 | WHERE d_year = 1999 + 1 AND d_moy = 12 AND d_dom = 11) 70 | GROUP BY i_brand_id, i_class_id, i_category_id 71 | HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales 72 | FROM avg_sales)) this_year, 73 | (SELECT 74 | 'store' channel, 75 | i_brand_id, 76 | i_class_id, 77 | i_category_id, 78 | sum(ss_quantity * ss_list_price) sales, 79 | count(*) number_sales 80 | FROM store_sales, item, date_dim 81 | WHERE ss_item_sk IN (SELECT ss_item_sk 82 | FROM cross_items) 83 | AND ss_item_sk = i_item_sk 84 | AND ss_sold_date_sk = d_date_sk 85 | AND d_week_seq = (SELECT d_week_seq 86 | FROM date_dim 87 | WHERE d_year = 1999 AND d_moy = 12 AND d_dom = 11) 88 | GROUP BY i_brand_id, i_class_id, i_category_id 89 | HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales 90 | FROM avg_sales)) last_year 91 | WHERE this_year.i_brand_id = last_year.i_brand_id 92 | AND this_year.i_class_id = last_year.i_class_id 93 | AND this_year.i_category_id = last_year.i_category_id 94 | ORDER BY this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id 95 | LIMIT 100 96 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q15.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ca_zip, 3 | sum(cs_sales_price) 4 | FROM catalog_sales, customer, customer_address, date_dim 5 | WHERE cs_bill_customer_sk = c_customer_sk 6 | AND c_current_addr_sk = ca_address_sk 7 | AND (substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475', 8 | '85392', '85460', '80348', '81792') 9 | OR ca_state IN ('CA', 'WA', 'GA') 10 | OR cs_sales_price > 500) 11 | AND cs_sold_date_sk = d_date_sk 12 | AND d_qoy = 2 AND d_year = 2001 13 | GROUP BY ca_zip 14 | ORDER BY ca_zip 15 | LIMIT 100 16 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q16.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | count(DISTINCT cs_order_number) AS `order count `, 3 | sum(cs_ext_ship_cost) AS `total shipping cost `, 4 | sum(cs_net_profit) AS `total net profit ` 5 | FROM 6 | catalog_sales cs1, date_dim, customer_address, call_center 7 | WHERE 8 | d_date BETWEEN '2002-02-01' AND (CAST('2002-02-01' AS DATE) + INTERVAL 60 days) 9 | AND cs1.cs_ship_date_sk = d_date_sk 10 | AND cs1.cs_ship_addr_sk = ca_address_sk 11 | AND ca_state = 'GA' 12 | AND cs1.cs_call_center_sk = cc_call_center_sk 13 | AND cc_county IN 14 | ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') 15 | AND EXISTS(SELECT * 16 | FROM catalog_sales cs2 17 | WHERE cs1.cs_order_number = cs2.cs_order_number 18 | AND cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) 19 | AND NOT EXISTS(SELECT * 20 | FROM catalog_returns cr1 21 | WHERE cs1.cs_order_number = cr1.cr_order_number) 22 | ORDER BY count(DISTINCT cs_order_number) 23 | LIMIT 100 24 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q17.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | i_item_desc, 4 | s_state, 5 | count(ss_quantity) AS store_sales_quantitycount, 6 | avg(ss_quantity) AS store_sales_quantityave, 7 | stddev_samp(ss_quantity) AS store_sales_quantitystdev, 8 | stddev_samp(ss_quantity) / avg(ss_quantity) AS store_sales_quantitycov, 9 | count(sr_return_quantity) as_store_returns_quantitycount, 10 | avg(sr_return_quantity) as_store_returns_quantityave, 11 | stddev_samp(sr_return_quantity) as_store_returns_quantitystdev, 12 | stddev_samp(sr_return_quantity) / avg(sr_return_quantity) AS store_returns_quantitycov, 13 | count(cs_quantity) AS catalog_sales_quantitycount, 14 | avg(cs_quantity) AS catalog_sales_quantityave, 15 | stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitystdev, 16 | stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitycov 17 | FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item 18 | WHERE d1.d_quarter_name = '2001Q1' 19 | AND d1.d_date_sk = ss_sold_date_sk 20 | AND i_item_sk = ss_item_sk 21 | AND s_store_sk = ss_store_sk 22 | AND ss_customer_sk = sr_customer_sk 23 | AND ss_item_sk = sr_item_sk 24 | AND ss_ticket_number = sr_ticket_number 25 | AND sr_returned_date_sk = d2.d_date_sk 26 | AND d2.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') 27 | AND sr_customer_sk = cs_bill_customer_sk 28 | AND sr_item_sk = cs_item_sk 29 | AND cs_sold_date_sk = d3.d_date_sk 30 | AND d3.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') 31 | GROUP BY i_item_id, i_item_desc, s_state 32 | ORDER BY i_item_id, i_item_desc, s_state 33 | LIMIT 100 34 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q18.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | ca_country, 4 | ca_state, 5 | ca_county, 6 | avg(cast(cs_quantity AS DECIMAL(12, 2))) agg1, 7 | avg(cast(cs_list_price AS DECIMAL(12, 2))) agg2, 8 | avg(cast(cs_coupon_amt AS DECIMAL(12, 2))) agg3, 9 | avg(cast(cs_sales_price AS DECIMAL(12, 2))) agg4, 10 | avg(cast(cs_net_profit AS DECIMAL(12, 2))) agg5, 11 | avg(cast(c_birth_year AS DECIMAL(12, 2))) agg6, 12 | avg(cast(cd1.cd_dep_count AS DECIMAL(12, 2))) agg7 13 | FROM catalog_sales, customer_demographics cd1, 14 | customer_demographics cd2, customer, customer_address, date_dim, item 15 | WHERE cs_sold_date_sk = d_date_sk AND 16 | cs_item_sk = i_item_sk AND 17 | cs_bill_cdemo_sk = cd1.cd_demo_sk AND 18 | cs_bill_customer_sk = c_customer_sk AND 19 | cd1.cd_gender = 'F' AND 20 | cd1.cd_education_status = 'Unknown' AND 21 | c_current_cdemo_sk = cd2.cd_demo_sk AND 22 | c_current_addr_sk = ca_address_sk AND 23 | c_birth_month IN (1, 6, 8, 9, 12, 2) AND 24 | d_year = 1998 AND 25 | ca_state IN ('MS', 'IN', 'ND', 'OK', 'NM', 'VA', 'MS') 26 | GROUP BY ROLLUP (i_item_id, ca_country, ca_state, ca_county) 27 | ORDER BY ca_country, ca_state, ca_county, i_item_id 28 | LIMIT 100 29 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q19.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_brand_id brand_id, 3 | i_brand brand, 4 | i_manufact_id, 5 | i_manufact, 6 | sum(ss_ext_sales_price) ext_price 7 | FROM date_dim, store_sales, item, customer, customer_address, store 8 | WHERE d_date_sk = ss_sold_date_sk 9 | AND ss_item_sk = i_item_sk 10 | AND i_manager_id = 8 11 | AND d_moy = 11 12 | AND d_year = 1998 13 | AND ss_customer_sk = c_customer_sk 14 | AND c_current_addr_sk = ca_address_sk 15 | AND substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) 16 | AND ss_store_sk = s_store_sk 17 | GROUP BY i_brand, i_brand_id, i_manufact_id, i_manufact 18 | ORDER BY ext_price DESC, brand, brand_id, i_manufact_id, i_manufact 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q2.sql: -------------------------------------------------------------------------------- 1 | WITH wscs AS 2 | ( SELECT 3 | sold_date_sk, 4 | sales_price 5 | FROM (SELECT 6 | ws_sold_date_sk sold_date_sk, 7 | ws_ext_sales_price sales_price 8 | FROM web_sales) x 9 | UNION ALL 10 | (SELECT 11 | cs_sold_date_sk sold_date_sk, 12 | cs_ext_sales_price sales_price 13 | FROM catalog_sales)), 14 | wswscs AS 15 | ( SELECT 16 | d_week_seq, 17 | sum(CASE WHEN (d_day_name = 'Sunday') 18 | THEN sales_price 19 | ELSE NULL END) 20 | sun_sales, 21 | sum(CASE WHEN (d_day_name = 'Monday') 22 | THEN sales_price 23 | ELSE NULL END) 24 | mon_sales, 25 | sum(CASE WHEN (d_day_name = 'Tuesday') 26 | THEN sales_price 27 | ELSE NULL END) 28 | tue_sales, 29 | sum(CASE WHEN (d_day_name = 'Wednesday') 30 | THEN sales_price 31 | ELSE NULL END) 32 | wed_sales, 33 | sum(CASE WHEN (d_day_name = 'Thursday') 34 | THEN sales_price 35 | ELSE NULL END) 36 | thu_sales, 37 | sum(CASE WHEN (d_day_name = 'Friday') 38 | THEN sales_price 39 | ELSE NULL END) 40 | fri_sales, 41 | sum(CASE WHEN (d_day_name = 'Saturday') 42 | THEN sales_price 43 | ELSE NULL END) 44 | sat_sales 45 | FROM wscs, date_dim 46 | WHERE d_date_sk = sold_date_sk 47 | GROUP BY d_week_seq) 48 | SELECT 49 | d_week_seq1, 50 | round(sun_sales1 / sun_sales2, 2), 51 | round(mon_sales1 / mon_sales2, 2), 52 | round(tue_sales1 / tue_sales2, 2), 53 | round(wed_sales1 / wed_sales2, 2), 54 | round(thu_sales1 / thu_sales2, 2), 55 | round(fri_sales1 / fri_sales2, 2), 56 | round(sat_sales1 / sat_sales2, 2) 57 | FROM 58 | (SELECT 59 | wswscs.d_week_seq d_week_seq1, 60 | sun_sales sun_sales1, 61 | mon_sales mon_sales1, 62 | tue_sales tue_sales1, 63 | wed_sales wed_sales1, 64 | thu_sales thu_sales1, 65 | fri_sales fri_sales1, 66 | sat_sales sat_sales1 67 | FROM wswscs, date_dim 68 | WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y, 69 | (SELECT 70 | wswscs.d_week_seq d_week_seq2, 71 | sun_sales sun_sales2, 72 | mon_sales mon_sales2, 73 | tue_sales tue_sales2, 74 | wed_sales wed_sales2, 75 | thu_sales thu_sales2, 76 | fri_sales fri_sales2, 77 | sat_sales sat_sales2 78 | FROM wswscs, date_dim 79 | WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1) z 80 | WHERE d_week_seq1 = d_week_seq2 - 53 81 | ORDER BY d_week_seq1 82 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q20.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_desc, 3 | i_category, 4 | i_class, 5 | i_current_price, 6 | sum(cs_ext_sales_price) AS itemrevenue, 7 | sum(cs_ext_sales_price) * 100 / sum(sum(cs_ext_sales_price)) 8 | OVER 9 | (PARTITION BY i_class) AS revenueratio 10 | FROM catalog_sales, item, date_dim 11 | WHERE cs_item_sk = i_item_sk 12 | AND i_category IN ('Sports', 'Books', 'Home') 13 | AND cs_sold_date_sk = d_date_sk 14 | AND d_date BETWEEN cast('1999-02-22' AS DATE) 15 | AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) 16 | GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price 17 | ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio 18 | LIMIT 100 19 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q21.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM ( 3 | SELECT 4 | w_warehouse_name, 5 | i_item_id, 6 | sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) 7 | THEN inv_quantity_on_hand 8 | ELSE 0 END) AS inv_before, 9 | sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) 10 | THEN inv_quantity_on_hand 11 | ELSE 0 END) AS inv_after 12 | FROM inventory, warehouse, item, date_dim 13 | WHERE i_current_price BETWEEN 0.99 AND 1.49 14 | AND i_item_sk = inv_item_sk 15 | AND inv_warehouse_sk = w_warehouse_sk 16 | AND inv_date_sk = d_date_sk 17 | AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) 18 | AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) 19 | GROUP BY w_warehouse_name, i_item_id) x 20 | WHERE (CASE WHEN inv_before > 0 21 | THEN inv_after / inv_before 22 | ELSE NULL 23 | END) BETWEEN 2.0 / 3.0 AND 3.0 / 2.0 24 | ORDER BY w_warehouse_name, i_item_id 25 | LIMIT 100 26 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q22.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_product_name, 3 | i_brand, 4 | i_class, 5 | i_category, 6 | avg(inv_quantity_on_hand) qoh 7 | FROM inventory, date_dim, item, warehouse 8 | WHERE inv_date_sk = d_date_sk 9 | AND inv_item_sk = i_item_sk 10 | AND inv_warehouse_sk = w_warehouse_sk 11 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 12 | GROUP BY ROLLUP (i_product_name, i_brand, i_class, i_category) 13 | ORDER BY qoh, i_product_name, i_brand, i_class, i_category 14 | LIMIT 100 15 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q23a.sql: -------------------------------------------------------------------------------- 1 | WITH frequent_ss_items AS 2 | (SELECT 3 | substr(i_item_desc, 1, 30) itemdesc, 4 | i_item_sk item_sk, 5 | d_date solddate, 6 | count(*) cnt 7 | FROM store_sales, date_dim, item 8 | WHERE ss_sold_date_sk = d_date_sk 9 | AND ss_item_sk = i_item_sk 10 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) 11 | GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date 12 | HAVING count(*) > 4), 13 | max_store_sales AS 14 | (SELECT max(csales) tpcds_cmax 15 | FROM (SELECT 16 | c_customer_sk, 17 | sum(ss_quantity * ss_sales_price) csales 18 | FROM store_sales, customer, date_dim 19 | WHERE ss_customer_sk = c_customer_sk 20 | AND ss_sold_date_sk = d_date_sk 21 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) 22 | GROUP BY c_customer_sk) x), 23 | best_ss_customer AS 24 | (SELECT 25 | c_customer_sk, 26 | sum(ss_quantity * ss_sales_price) ssales 27 | FROM store_sales, customer 28 | WHERE ss_customer_sk = c_customer_sk 29 | GROUP BY c_customer_sk 30 | HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * 31 | (SELECT * 32 | FROM max_store_sales)) 33 | SELECT sum(sales) 34 | FROM ((SELECT cs_quantity * cs_list_price sales 35 | FROM catalog_sales, date_dim 36 | WHERE d_year = 2000 37 | AND d_moy = 2 38 | AND cs_sold_date_sk = d_date_sk 39 | AND cs_item_sk IN (SELECT item_sk 40 | FROM frequent_ss_items) 41 | AND cs_bill_customer_sk IN (SELECT c_customer_sk 42 | FROM best_ss_customer)) 43 | UNION ALL 44 | (SELECT ws_quantity * ws_list_price sales 45 | FROM web_sales, date_dim 46 | WHERE d_year = 2000 47 | AND d_moy = 2 48 | AND ws_sold_date_sk = d_date_sk 49 | AND ws_item_sk IN (SELECT item_sk 50 | FROM frequent_ss_items) 51 | AND ws_bill_customer_sk IN (SELECT c_customer_sk 52 | FROM best_ss_customer))) y 53 | LIMIT 100 54 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q23b.sql: -------------------------------------------------------------------------------- 1 | WITH frequent_ss_items AS 2 | (SELECT 3 | substr(i_item_desc, 1, 30) itemdesc, 4 | i_item_sk item_sk, 5 | d_date solddate, 6 | count(*) cnt 7 | FROM store_sales, date_dim, item 8 | WHERE ss_sold_date_sk = d_date_sk 9 | AND ss_item_sk = i_item_sk 10 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) 11 | GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date 12 | HAVING count(*) > 4), 13 | max_store_sales AS 14 | (SELECT max(csales) tpcds_cmax 15 | FROM (SELECT 16 | c_customer_sk, 17 | sum(ss_quantity * ss_sales_price) csales 18 | FROM store_sales, customer, date_dim 19 | WHERE ss_customer_sk = c_customer_sk 20 | AND ss_sold_date_sk = d_date_sk 21 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) 22 | GROUP BY c_customer_sk) x), 23 | best_ss_customer AS 24 | (SELECT 25 | c_customer_sk, 26 | sum(ss_quantity * ss_sales_price) ssales 27 | FROM store_sales 28 | , customer 29 | WHERE ss_customer_sk = c_customer_sk 30 | GROUP BY c_customer_sk 31 | HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * 32 | (SELECT * 33 | FROM max_store_sales)) 34 | SELECT 35 | c_last_name, 36 | c_first_name, 37 | sales 38 | FROM ((SELECT 39 | c_last_name, 40 | c_first_name, 41 | sum(cs_quantity * cs_list_price) sales 42 | FROM catalog_sales, customer, date_dim 43 | WHERE d_year = 2000 44 | AND d_moy = 2 45 | AND cs_sold_date_sk = d_date_sk 46 | AND cs_item_sk IN (SELECT item_sk 47 | FROM frequent_ss_items) 48 | AND cs_bill_customer_sk IN (SELECT c_customer_sk 49 | FROM best_ss_customer) 50 | AND cs_bill_customer_sk = c_customer_sk 51 | GROUP BY c_last_name, c_first_name) 52 | UNION ALL 53 | (SELECT 54 | c_last_name, 55 | c_first_name, 56 | sum(ws_quantity * ws_list_price) sales 57 | FROM web_sales, customer, date_dim 58 | WHERE d_year = 2000 59 | AND d_moy = 2 60 | AND ws_sold_date_sk = d_date_sk 61 | AND ws_item_sk IN (SELECT item_sk 62 | FROM frequent_ss_items) 63 | AND ws_bill_customer_sk IN (SELECT c_customer_sk 64 | FROM best_ss_customer) 65 | AND ws_bill_customer_sk = c_customer_sk 66 | GROUP BY c_last_name, c_first_name)) y 67 | ORDER BY c_last_name, c_first_name, sales 68 | LIMIT 100 69 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q24a.sql: -------------------------------------------------------------------------------- 1 | WITH ssales AS 2 | (SELECT 3 | c_last_name, 4 | c_first_name, 5 | s_store_name, 6 | ca_state, 7 | s_state, 8 | i_color, 9 | i_current_price, 10 | i_manager_id, 11 | i_units, 12 | i_size, 13 | sum(ss_net_paid) netpaid 14 | FROM store_sales, store_returns, store, item, customer, customer_address 15 | WHERE ss_ticket_number = sr_ticket_number 16 | AND ss_item_sk = sr_item_sk 17 | AND ss_customer_sk = c_customer_sk 18 | AND ss_item_sk = i_item_sk 19 | AND ss_store_sk = s_store_sk 20 | AND c_birth_country = upper(ca_country) 21 | AND s_zip = ca_zip 22 | AND s_market_id = 8 23 | GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, 24 | i_current_price, i_manager_id, i_units, i_size) 25 | SELECT 26 | c_last_name, 27 | c_first_name, 28 | s_store_name, 29 | sum(netpaid) paid 30 | FROM ssales 31 | WHERE i_color = 'pale' 32 | GROUP BY c_last_name, c_first_name, s_store_name 33 | HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) 34 | FROM ssales) 35 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q24b.sql: -------------------------------------------------------------------------------- 1 | WITH ssales AS 2 | (SELECT 3 | c_last_name, 4 | c_first_name, 5 | s_store_name, 6 | ca_state, 7 | s_state, 8 | i_color, 9 | i_current_price, 10 | i_manager_id, 11 | i_units, 12 | i_size, 13 | sum(ss_net_paid) netpaid 14 | FROM store_sales, store_returns, store, item, customer, customer_address 15 | WHERE ss_ticket_number = sr_ticket_number 16 | AND ss_item_sk = sr_item_sk 17 | AND ss_customer_sk = c_customer_sk 18 | AND ss_item_sk = i_item_sk 19 | AND ss_store_sk = s_store_sk 20 | AND c_birth_country = upper(ca_country) 21 | AND s_zip = ca_zip 22 | AND s_market_id = 8 23 | GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, 24 | i_color, i_current_price, i_manager_id, i_units, i_size) 25 | SELECT 26 | c_last_name, 27 | c_first_name, 28 | s_store_name, 29 | sum(netpaid) paid 30 | FROM ssales 31 | WHERE i_color = 'chiffon' 32 | GROUP BY c_last_name, c_first_name, s_store_name 33 | HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) 34 | FROM ssales) 35 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q25.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | i_item_desc, 4 | s_store_id, 5 | s_store_name, 6 | sum(ss_net_profit) AS store_sales_profit, 7 | sum(sr_net_loss) AS store_returns_loss, 8 | sum(cs_net_profit) AS catalog_sales_profit 9 | FROM 10 | store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, 11 | store, item 12 | WHERE 13 | d1.d_moy = 4 14 | AND d1.d_year = 2001 15 | AND d1.d_date_sk = ss_sold_date_sk 16 | AND i_item_sk = ss_item_sk 17 | AND s_store_sk = ss_store_sk 18 | AND ss_customer_sk = sr_customer_sk 19 | AND ss_item_sk = sr_item_sk 20 | AND ss_ticket_number = sr_ticket_number 21 | AND sr_returned_date_sk = d2.d_date_sk 22 | AND d2.d_moy BETWEEN 4 AND 10 23 | AND d2.d_year = 2001 24 | AND sr_customer_sk = cs_bill_customer_sk 25 | AND sr_item_sk = cs_item_sk 26 | AND cs_sold_date_sk = d3.d_date_sk 27 | AND d3.d_moy BETWEEN 4 AND 10 28 | AND d3.d_year = 2001 29 | GROUP BY 30 | i_item_id, i_item_desc, s_store_id, s_store_name 31 | ORDER BY 32 | i_item_id, i_item_desc, s_store_id, s_store_name 33 | LIMIT 100 -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q26.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | avg(cs_quantity) agg1, 4 | avg(cs_list_price) agg2, 5 | avg(cs_coupon_amt) agg3, 6 | avg(cs_sales_price) agg4 7 | FROM catalog_sales, customer_demographics, date_dim, item, promotion 8 | WHERE cs_sold_date_sk = d_date_sk AND 9 | cs_item_sk = i_item_sk AND 10 | cs_bill_cdemo_sk = cd_demo_sk AND 11 | cs_promo_sk = p_promo_sk AND 12 | cd_gender = 'M' AND 13 | cd_marital_status = 'S' AND 14 | cd_education_status = 'College' AND 15 | (p_channel_email = 'N' OR p_channel_event = 'N') AND 16 | d_year = 2000 17 | GROUP BY i_item_id 18 | ORDER BY i_item_id 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q27.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | s_state, 4 | grouping(s_state) g_state, 5 | avg(ss_quantity) agg1, 6 | avg(ss_list_price) agg2, 7 | avg(ss_coupon_amt) agg3, 8 | avg(ss_sales_price) agg4 9 | FROM store_sales, customer_demographics, date_dim, store, item 10 | WHERE ss_sold_date_sk = d_date_sk AND 11 | ss_item_sk = i_item_sk AND 12 | ss_store_sk = s_store_sk AND 13 | ss_cdemo_sk = cd_demo_sk AND 14 | cd_gender = 'M' AND 15 | cd_marital_status = 'S' AND 16 | cd_education_status = 'College' AND 17 | d_year = 2002 AND 18 | s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN') 19 | GROUP BY ROLLUP (i_item_id, s_state) 20 | ORDER BY i_item_id, s_state 21 | LIMIT 100 22 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q28.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM (SELECT 3 | avg(ss_list_price) B1_LP, 4 | count(ss_list_price) B1_CNT, 5 | count(DISTINCT ss_list_price) B1_CNTD 6 | FROM store_sales 7 | WHERE ss_quantity BETWEEN 0 AND 5 8 | AND (ss_list_price BETWEEN 8 AND 8 + 10 9 | OR ss_coupon_amt BETWEEN 459 AND 459 + 1000 10 | OR ss_wholesale_cost BETWEEN 57 AND 57 + 20)) B1, 11 | (SELECT 12 | avg(ss_list_price) B2_LP, 13 | count(ss_list_price) B2_CNT, 14 | count(DISTINCT ss_list_price) B2_CNTD 15 | FROM store_sales 16 | WHERE ss_quantity BETWEEN 6 AND 10 17 | AND (ss_list_price BETWEEN 90 AND 90 + 10 18 | OR ss_coupon_amt BETWEEN 2323 AND 2323 + 1000 19 | OR ss_wholesale_cost BETWEEN 31 AND 31 + 20)) B2, 20 | (SELECT 21 | avg(ss_list_price) B3_LP, 22 | count(ss_list_price) B3_CNT, 23 | count(DISTINCT ss_list_price) B3_CNTD 24 | FROM store_sales 25 | WHERE ss_quantity BETWEEN 11 AND 15 26 | AND (ss_list_price BETWEEN 142 AND 142 + 10 27 | OR ss_coupon_amt BETWEEN 12214 AND 12214 + 1000 28 | OR ss_wholesale_cost BETWEEN 79 AND 79 + 20)) B3, 29 | (SELECT 30 | avg(ss_list_price) B4_LP, 31 | count(ss_list_price) B4_CNT, 32 | count(DISTINCT ss_list_price) B4_CNTD 33 | FROM store_sales 34 | WHERE ss_quantity BETWEEN 16 AND 20 35 | AND (ss_list_price BETWEEN 135 AND 135 + 10 36 | OR ss_coupon_amt BETWEEN 6071 AND 6071 + 1000 37 | OR ss_wholesale_cost BETWEEN 38 AND 38 + 20)) B4, 38 | (SELECT 39 | avg(ss_list_price) B5_LP, 40 | count(ss_list_price) B5_CNT, 41 | count(DISTINCT ss_list_price) B5_CNTD 42 | FROM store_sales 43 | WHERE ss_quantity BETWEEN 21 AND 25 44 | AND (ss_list_price BETWEEN 122 AND 122 + 10 45 | OR ss_coupon_amt BETWEEN 836 AND 836 + 1000 46 | OR ss_wholesale_cost BETWEEN 17 AND 17 + 20)) B5, 47 | (SELECT 48 | avg(ss_list_price) B6_LP, 49 | count(ss_list_price) B6_CNT, 50 | count(DISTINCT ss_list_price) B6_CNTD 51 | FROM store_sales 52 | WHERE ss_quantity BETWEEN 26 AND 30 53 | AND (ss_list_price BETWEEN 154 AND 154 + 10 54 | OR ss_coupon_amt BETWEEN 7326 AND 7326 + 1000 55 | OR ss_wholesale_cost BETWEEN 7 AND 7 + 20)) B6 56 | LIMIT 100 57 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q29.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | i_item_desc, 4 | s_store_id, 5 | s_store_name, 6 | sum(ss_quantity) AS store_sales_quantity, 7 | sum(sr_return_quantity) AS store_returns_quantity, 8 | sum(cs_quantity) AS catalog_sales_quantity 9 | FROM 10 | store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, 11 | date_dim d3, store, item 12 | WHERE 13 | d1.d_moy = 9 14 | AND d1.d_year = 1999 15 | AND d1.d_date_sk = ss_sold_date_sk 16 | AND i_item_sk = ss_item_sk 17 | AND s_store_sk = ss_store_sk 18 | AND ss_customer_sk = sr_customer_sk 19 | AND ss_item_sk = sr_item_sk 20 | AND ss_ticket_number = sr_ticket_number 21 | AND sr_returned_date_sk = d2.d_date_sk 22 | AND d2.d_moy BETWEEN 9 AND 9 + 3 23 | AND d2.d_year = 1999 24 | AND sr_customer_sk = cs_bill_customer_sk 25 | AND sr_item_sk = cs_item_sk 26 | AND cs_sold_date_sk = d3.d_date_sk 27 | AND d3.d_year IN (1999, 1999 + 1, 1999 + 2) 28 | GROUP BY 29 | i_item_id, i_item_desc, s_store_id, s_store_name 30 | ORDER BY 31 | i_item_id, i_item_desc, s_store_id, s_store_name 32 | LIMIT 100 33 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_brand_id brand_id, 4 | item.i_brand brand, 5 | SUM(ss_ext_sales_price) sum_agg 6 | FROM date_dim dt, store_sales, item 7 | WHERE dt.d_date_sk = store_sales.ss_sold_date_sk 8 | AND store_sales.ss_item_sk = item.i_item_sk 9 | AND item.i_manufact_id = 128 10 | AND dt.d_moy = 11 11 | GROUP BY dt.d_year, item.i_brand, item.i_brand_id 12 | ORDER BY dt.d_year, sum_agg DESC, brand_id 13 | LIMIT 100 14 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q30.sql: -------------------------------------------------------------------------------- 1 | WITH customer_total_return AS 2 | (SELECT 3 | wr_returning_customer_sk AS ctr_customer_sk, 4 | ca_state AS ctr_state, 5 | sum(wr_return_amt) AS ctr_total_return 6 | FROM web_returns, date_dim, customer_address 7 | WHERE wr_returned_date_sk = d_date_sk 8 | AND d_year = 2002 9 | AND wr_returning_addr_sk = ca_address_sk 10 | GROUP BY wr_returning_customer_sk, ca_state) 11 | SELECT 12 | c_customer_id, 13 | c_salutation, 14 | c_first_name, 15 | c_last_name, 16 | c_preferred_cust_flag, 17 | c_birth_day, 18 | c_birth_month, 19 | c_birth_year, 20 | c_birth_country, 21 | c_login, 22 | c_email_address, 23 | c_last_review_date, 24 | ctr_total_return 25 | FROM customer_total_return ctr1, customer_address, customer 26 | WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 27 | FROM customer_total_return ctr2 28 | WHERE ctr1.ctr_state = ctr2.ctr_state) 29 | AND ca_address_sk = c_current_addr_sk 30 | AND ca_state = 'GA' 31 | AND ctr1.ctr_customer_sk = c_customer_sk 32 | ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag 33 | , c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address 34 | , c_last_review_date, ctr_total_return 35 | LIMIT 100 36 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q31.sql: -------------------------------------------------------------------------------- 1 | WITH ss AS 2 | (SELECT 3 | ca_county, 4 | d_qoy, 5 | d_year, 6 | sum(ss_ext_sales_price) AS store_sales 7 | FROM store_sales, date_dim, customer_address 8 | WHERE ss_sold_date_sk = d_date_sk 9 | AND ss_addr_sk = ca_address_sk 10 | GROUP BY ca_county, d_qoy, d_year), 11 | ws AS 12 | (SELECT 13 | ca_county, 14 | d_qoy, 15 | d_year, 16 | sum(ws_ext_sales_price) AS web_sales 17 | FROM web_sales, date_dim, customer_address 18 | WHERE ws_sold_date_sk = d_date_sk 19 | AND ws_bill_addr_sk = ca_address_sk 20 | GROUP BY ca_county, d_qoy, d_year) 21 | SELECT 22 | ss1.ca_county, 23 | ss1.d_year, 24 | ws2.web_sales / ws1.web_sales web_q1_q2_increase, 25 | ss2.store_sales / ss1.store_sales store_q1_q2_increase, 26 | ws3.web_sales / ws2.web_sales web_q2_q3_increase, 27 | ss3.store_sales / ss2.store_sales store_q2_q3_increase 28 | FROM 29 | ss ss1, ss ss2, ss ss3, ws ws1, ws ws2, ws ws3 30 | WHERE 31 | ss1.d_qoy = 1 32 | AND ss1.d_year = 2000 33 | AND ss1.ca_county = ss2.ca_county 34 | AND ss2.d_qoy = 2 35 | AND ss2.d_year = 2000 36 | AND ss2.ca_county = ss3.ca_county 37 | AND ss3.d_qoy = 3 38 | AND ss3.d_year = 2000 39 | AND ss1.ca_county = ws1.ca_county 40 | AND ws1.d_qoy = 1 41 | AND ws1.d_year = 2000 42 | AND ws1.ca_county = ws2.ca_county 43 | AND ws2.d_qoy = 2 44 | AND ws2.d_year = 2000 45 | AND ws1.ca_county = ws3.ca_county 46 | AND ws3.d_qoy = 3 47 | AND ws3.d_year = 2000 48 | AND CASE WHEN ws1.web_sales > 0 49 | THEN ws2.web_sales / ws1.web_sales 50 | ELSE NULL END 51 | > CASE WHEN ss1.store_sales > 0 52 | THEN ss2.store_sales / ss1.store_sales 53 | ELSE NULL END 54 | AND CASE WHEN ws2.web_sales > 0 55 | THEN ws3.web_sales / ws2.web_sales 56 | ELSE NULL END 57 | > CASE WHEN ss2.store_sales > 0 58 | THEN ss3.store_sales / ss2.store_sales 59 | ELSE NULL END 60 | ORDER BY ss1.ca_county 61 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q32.sql: -------------------------------------------------------------------------------- 1 | SELECT 1 AS `excess discount amount ` 2 | FROM 3 | catalog_sales, item, date_dim 4 | WHERE 5 | i_manufact_id = 977 6 | AND i_item_sk = cs_item_sk 7 | AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) 8 | AND d_date_sk = cs_sold_date_sk 9 | AND cs_ext_discount_amt > ( 10 | SELECT 1.3 * avg(cs_ext_discount_amt) 11 | FROM catalog_sales, date_dim 12 | WHERE cs_item_sk = i_item_sk 13 | AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) 14 | AND d_date_sk = cs_sold_date_sk) 15 | LIMIT 100 16 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q33.sql: -------------------------------------------------------------------------------- 1 | WITH ss AS ( 2 | SELECT 3 | i_manufact_id, 4 | sum(ss_ext_sales_price) total_sales 5 | FROM 6 | store_sales, date_dim, customer_address, item 7 | WHERE 8 | i_manufact_id IN (SELECT i_manufact_id 9 | FROM item 10 | WHERE i_category IN ('Electronics')) 11 | AND ss_item_sk = i_item_sk 12 | AND ss_sold_date_sk = d_date_sk 13 | AND d_year = 1998 14 | AND d_moy = 5 15 | AND ss_addr_sk = ca_address_sk 16 | AND ca_gmt_offset = -5 17 | GROUP BY i_manufact_id), cs AS 18 | (SELECT 19 | i_manufact_id, 20 | sum(cs_ext_sales_price) total_sales 21 | FROM catalog_sales, date_dim, customer_address, item 22 | WHERE 23 | i_manufact_id IN ( 24 | SELECT i_manufact_id 25 | FROM item 26 | WHERE 27 | i_category IN ('Electronics')) 28 | AND cs_item_sk = i_item_sk 29 | AND cs_sold_date_sk = d_date_sk 30 | AND d_year = 1998 31 | AND d_moy = 5 32 | AND cs_bill_addr_sk = ca_address_sk 33 | AND ca_gmt_offset = -5 34 | GROUP BY i_manufact_id), 35 | ws AS ( 36 | SELECT 37 | i_manufact_id, 38 | sum(ws_ext_sales_price) total_sales 39 | FROM 40 | web_sales, date_dim, customer_address, item 41 | WHERE 42 | i_manufact_id IN (SELECT i_manufact_id 43 | FROM item 44 | WHERE i_category IN ('Electronics')) 45 | AND ws_item_sk = i_item_sk 46 | AND ws_sold_date_sk = d_date_sk 47 | AND d_year = 1998 48 | AND d_moy = 5 49 | AND ws_bill_addr_sk = ca_address_sk 50 | AND ca_gmt_offset = -5 51 | GROUP BY i_manufact_id) 52 | SELECT 53 | i_manufact_id, 54 | sum(total_sales) total_sales 55 | FROM (SELECT * 56 | FROM ss 57 | UNION ALL 58 | SELECT * 59 | FROM cs 60 | UNION ALL 61 | SELECT * 62 | FROM ws) tmp1 63 | GROUP BY i_manufact_id 64 | ORDER BY total_sales 65 | LIMIT 100 66 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q34.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_last_name, 3 | c_first_name, 4 | c_salutation, 5 | c_preferred_cust_flag, 6 | ss_ticket_number, 7 | cnt 8 | FROM 9 | (SELECT 10 | ss_ticket_number, 11 | ss_customer_sk, 12 | count(*) cnt 13 | FROM store_sales, date_dim, store, household_demographics 14 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 15 | AND store_sales.ss_store_sk = store.s_store_sk 16 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 17 | AND (date_dim.d_dom BETWEEN 1 AND 3 OR date_dim.d_dom BETWEEN 25 AND 28) 18 | AND (household_demographics.hd_buy_potential = '>10000' OR 19 | household_demographics.hd_buy_potential = 'unknown') 20 | AND household_demographics.hd_vehicle_count > 0 21 | AND (CASE WHEN household_demographics.hd_vehicle_count > 0 22 | THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count 23 | ELSE NULL 24 | END) > 1.2 25 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) 26 | AND store.s_county IN 27 | ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 28 | 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') 29 | GROUP BY ss_ticket_number, ss_customer_sk) dn, customer 30 | WHERE ss_customer_sk = c_customer_sk 31 | AND cnt BETWEEN 15 AND 20 32 | ORDER BY c_last_name, c_first_name, c_salutation, c_preferred_cust_flag DESC 33 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q35.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ca_state, 3 | cd_gender, 4 | cd_marital_status, 5 | count(*) cnt1, 6 | min(cd_dep_count), 7 | max(cd_dep_count), 8 | avg(cd_dep_count), 9 | cd_dep_employed_count, 10 | count(*) cnt2, 11 | min(cd_dep_employed_count), 12 | max(cd_dep_employed_count), 13 | avg(cd_dep_employed_count), 14 | cd_dep_college_count, 15 | count(*) cnt3, 16 | min(cd_dep_college_count), 17 | max(cd_dep_college_count), 18 | avg(cd_dep_college_count) 19 | FROM 20 | customer c, customer_address ca, customer_demographics 21 | WHERE 22 | c.c_current_addr_sk = ca.ca_address_sk AND 23 | cd_demo_sk = c.c_current_cdemo_sk AND 24 | exists(SELECT * 25 | FROM store_sales, date_dim 26 | WHERE c.c_customer_sk = ss_customer_sk AND 27 | ss_sold_date_sk = d_date_sk AND 28 | d_year = 2002 AND 29 | d_qoy < 4) AND 30 | (exists(SELECT * 31 | FROM web_sales, date_dim 32 | WHERE c.c_customer_sk = ws_bill_customer_sk AND 33 | ws_sold_date_sk = d_date_sk AND 34 | d_year = 2002 AND 35 | d_qoy < 4) OR 36 | exists(SELECT * 37 | FROM catalog_sales, date_dim 38 | WHERE c.c_customer_sk = cs_ship_customer_sk AND 39 | cs_sold_date_sk = d_date_sk AND 40 | d_year = 2002 AND 41 | d_qoy < 4)) 42 | GROUP BY ca_state, cd_gender, cd_marital_status, cd_dep_count, 43 | cd_dep_employed_count, cd_dep_college_count 44 | ORDER BY ca_state, cd_gender, cd_marital_status, cd_dep_count, 45 | cd_dep_employed_count, cd_dep_college_count 46 | LIMIT 100 47 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q36.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sum(ss_net_profit) / sum(ss_ext_sales_price) AS gross_margin, 3 | i_category, 4 | i_class, 5 | grouping(i_category) + grouping(i_class) AS lochierarchy, 6 | rank() 7 | OVER ( 8 | PARTITION BY grouping(i_category) + grouping(i_class), 9 | CASE WHEN grouping(i_class) = 0 10 | THEN i_category END 11 | ORDER BY sum(ss_net_profit) / sum(ss_ext_sales_price) ASC) AS rank_within_parent 12 | FROM 13 | store_sales, date_dim d1, item, store 14 | WHERE 15 | d1.d_year = 2001 16 | AND d1.d_date_sk = ss_sold_date_sk 17 | AND i_item_sk = ss_item_sk 18 | AND s_store_sk = ss_store_sk 19 | AND s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN') 20 | GROUP BY ROLLUP (i_category, i_class) 21 | ORDER BY 22 | lochierarchy DESC 23 | , CASE WHEN lochierarchy = 0 24 | THEN i_category END 25 | , rank_within_parent 26 | LIMIT 100 27 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q37.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | i_item_desc, 4 | i_current_price 5 | FROM item, inventory, date_dim, catalog_sales 6 | WHERE i_current_price BETWEEN 68 AND 68 + 30 7 | AND inv_item_sk = i_item_sk 8 | AND d_date_sk = inv_date_sk 9 | AND d_date BETWEEN cast('2000-02-01' AS DATE) AND (cast('2000-02-01' AS DATE) + INTERVAL 60 days) 10 | AND i_manufact_id IN (677, 940, 694, 808) 11 | AND inv_quantity_on_hand BETWEEN 100 AND 500 12 | AND cs_item_sk = i_item_sk 13 | GROUP BY i_item_id, i_item_desc, i_current_price 14 | ORDER BY i_item_id 15 | LIMIT 100 16 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q38.sql: -------------------------------------------------------------------------------- 1 | SELECT count(*) 2 | FROM ( 3 | SELECT DISTINCT 4 | c_last_name, 5 | c_first_name, 6 | d_date 7 | FROM store_sales, date_dim, customer 8 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 9 | AND store_sales.ss_customer_sk = customer.c_customer_sk 10 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 11 | INTERSECT 12 | SELECT DISTINCT 13 | c_last_name, 14 | c_first_name, 15 | d_date 16 | FROM catalog_sales, date_dim, customer 17 | WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 18 | AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 19 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 20 | INTERSECT 21 | SELECT DISTINCT 22 | c_last_name, 23 | c_first_name, 24 | d_date 25 | FROM web_sales, date_dim, customer 26 | WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk 27 | AND web_sales.ws_bill_customer_sk = customer.c_customer_sk 28 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 29 | ) hot_cust 30 | LIMIT 100 31 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q39a.sql: -------------------------------------------------------------------------------- 1 | WITH inv AS 2 | (SELECT 3 | w_warehouse_name, 4 | w_warehouse_sk, 5 | i_item_sk, 6 | d_moy, 7 | stdev, 8 | mean, 9 | CASE mean 10 | WHEN 0 11 | THEN NULL 12 | ELSE stdev / mean END cov 13 | FROM (SELECT 14 | w_warehouse_name, 15 | w_warehouse_sk, 16 | i_item_sk, 17 | d_moy, 18 | stddev_samp(inv_quantity_on_hand) stdev, 19 | avg(inv_quantity_on_hand) mean 20 | FROM inventory, item, warehouse, date_dim 21 | WHERE inv_item_sk = i_item_sk 22 | AND inv_warehouse_sk = w_warehouse_sk 23 | AND inv_date_sk = d_date_sk 24 | AND d_year = 2001 25 | GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo 26 | WHERE CASE mean 27 | WHEN 0 28 | THEN 0 29 | ELSE stdev / mean END > 1) 30 | SELECT 31 | inv1.w_warehouse_sk, 32 | inv1.i_item_sk, 33 | inv1.d_moy, 34 | inv1.mean, 35 | inv1.cov, 36 | inv2.w_warehouse_sk, 37 | inv2.i_item_sk, 38 | inv2.d_moy, 39 | inv2.mean, 40 | inv2.cov 41 | FROM inv inv1, inv inv2 42 | WHERE inv1.i_item_sk = inv2.i_item_sk 43 | AND inv1.w_warehouse_sk = inv2.w_warehouse_sk 44 | AND inv1.d_moy = 1 45 | AND inv2.d_moy = 1 + 1 46 | ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov 47 | , inv2.d_moy, inv2.mean, inv2.cov 48 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q39b.sql: -------------------------------------------------------------------------------- 1 | WITH inv AS 2 | (SELECT 3 | w_warehouse_name, 4 | w_warehouse_sk, 5 | i_item_sk, 6 | d_moy, 7 | stdev, 8 | mean, 9 | CASE mean 10 | WHEN 0 11 | THEN NULL 12 | ELSE stdev / mean END cov 13 | FROM (SELECT 14 | w_warehouse_name, 15 | w_warehouse_sk, 16 | i_item_sk, 17 | d_moy, 18 | stddev_samp(inv_quantity_on_hand) stdev, 19 | avg(inv_quantity_on_hand) mean 20 | FROM inventory, item, warehouse, date_dim 21 | WHERE inv_item_sk = i_item_sk 22 | AND inv_warehouse_sk = w_warehouse_sk 23 | AND inv_date_sk = d_date_sk 24 | AND d_year = 2001 25 | GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo 26 | WHERE CASE mean 27 | WHEN 0 28 | THEN 0 29 | ELSE stdev / mean END > 1) 30 | SELECT 31 | inv1.w_warehouse_sk, 32 | inv1.i_item_sk, 33 | inv1.d_moy, 34 | inv1.mean, 35 | inv1.cov, 36 | inv2.w_warehouse_sk, 37 | inv2.i_item_sk, 38 | inv2.d_moy, 39 | inv2.mean, 40 | inv2.cov 41 | FROM inv inv1, inv inv2 42 | WHERE inv1.i_item_sk = inv2.i_item_sk 43 | AND inv1.w_warehouse_sk = inv2.w_warehouse_sk 44 | AND inv1.d_moy = 1 45 | AND inv2.d_moy = 1 + 1 46 | AND inv1.cov > 1.5 47 | ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov 48 | , inv2.d_moy, inv2.mean, inv2.cov 49 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q40.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | w_state, 3 | i_item_id, 4 | sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) 5 | THEN cs_sales_price - coalesce(cr_refunded_cash, 0) 6 | ELSE 0 END) AS sales_before, 7 | sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) 8 | THEN cs_sales_price - coalesce(cr_refunded_cash, 0) 9 | ELSE 0 END) AS sales_after 10 | FROM 11 | catalog_sales 12 | LEFT OUTER JOIN catalog_returns ON 13 | (cs_order_number = cr_order_number 14 | AND cs_item_sk = cr_item_sk) 15 | , warehouse, item, date_dim 16 | WHERE 17 | i_current_price BETWEEN 0.99 AND 1.49 18 | AND i_item_sk = cs_item_sk 19 | AND cs_warehouse_sk = w_warehouse_sk 20 | AND cs_sold_date_sk = d_date_sk 21 | AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) 22 | AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) 23 | GROUP BY w_state, i_item_id 24 | ORDER BY w_state, i_item_id 25 | LIMIT 100 26 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q41.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT (i_product_name) 2 | FROM item i1 3 | WHERE i_manufact_id BETWEEN 738 AND 738 + 40 4 | AND (SELECT count(*) AS item_cnt 5 | FROM item 6 | WHERE (i_manufact = i1.i_manufact AND 7 | ((i_category = 'Women' AND 8 | (i_color = 'powder' OR i_color = 'khaki') AND 9 | (i_units = 'Ounce' OR i_units = 'Oz') AND 10 | (i_size = 'medium' OR i_size = 'extra large') 11 | ) OR 12 | (i_category = 'Women' AND 13 | (i_color = 'brown' OR i_color = 'honeydew') AND 14 | (i_units = 'Bunch' OR i_units = 'Ton') AND 15 | (i_size = 'N/A' OR i_size = 'small') 16 | ) OR 17 | (i_category = 'Men' AND 18 | (i_color = 'floral' OR i_color = 'deep') AND 19 | (i_units = 'N/A' OR i_units = 'Dozen') AND 20 | (i_size = 'petite' OR i_size = 'large') 21 | ) OR 22 | (i_category = 'Men' AND 23 | (i_color = 'light' OR i_color = 'cornflower') AND 24 | (i_units = 'Box' OR i_units = 'Pound') AND 25 | (i_size = 'medium' OR i_size = 'extra large') 26 | ))) OR 27 | (i_manufact = i1.i_manufact AND 28 | ((i_category = 'Women' AND 29 | (i_color = 'midnight' OR i_color = 'snow') AND 30 | (i_units = 'Pallet' OR i_units = 'Gross') AND 31 | (i_size = 'medium' OR i_size = 'extra large') 32 | ) OR 33 | (i_category = 'Women' AND 34 | (i_color = 'cyan' OR i_color = 'papaya') AND 35 | (i_units = 'Cup' OR i_units = 'Dram') AND 36 | (i_size = 'N/A' OR i_size = 'small') 37 | ) OR 38 | (i_category = 'Men' AND 39 | (i_color = 'orange' OR i_color = 'frosted') AND 40 | (i_units = 'Each' OR i_units = 'Tbl') AND 41 | (i_size = 'petite' OR i_size = 'large') 42 | ) OR 43 | (i_category = 'Men' AND 44 | (i_color = 'forest' OR i_color = 'ghost') AND 45 | (i_units = 'Lb' OR i_units = 'Bundle') AND 46 | (i_size = 'medium' OR i_size = 'extra large') 47 | )))) > 0 48 | ORDER BY i_product_name 49 | LIMIT 100 50 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q42.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_category_id, 4 | item.i_category, 5 | sum(ss_ext_sales_price) 6 | FROM date_dim dt, store_sales, item 7 | WHERE dt.d_date_sk = store_sales.ss_sold_date_sk 8 | AND store_sales.ss_item_sk = item.i_item_sk 9 | AND item.i_manager_id = 1 10 | AND dt.d_moy = 11 11 | AND dt.d_year = 2000 12 | GROUP BY dt.d_year 13 | , item.i_category_id 14 | , item.i_category 15 | ORDER BY sum(ss_ext_sales_price) DESC, dt.d_year 16 | , item.i_category_id 17 | , item.i_category 18 | LIMIT 100 19 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q43.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | s_store_name, 3 | s_store_id, 4 | sum(CASE WHEN (d_day_name = 'Sunday') 5 | THEN ss_sales_price 6 | ELSE NULL END) sun_sales, 7 | sum(CASE WHEN (d_day_name = 'Monday') 8 | THEN ss_sales_price 9 | ELSE NULL END) mon_sales, 10 | sum(CASE WHEN (d_day_name = 'Tuesday') 11 | THEN ss_sales_price 12 | ELSE NULL END) tue_sales, 13 | sum(CASE WHEN (d_day_name = 'Wednesday') 14 | THEN ss_sales_price 15 | ELSE NULL END) wed_sales, 16 | sum(CASE WHEN (d_day_name = 'Thursday') 17 | THEN ss_sales_price 18 | ELSE NULL END) thu_sales, 19 | sum(CASE WHEN (d_day_name = 'Friday') 20 | THEN ss_sales_price 21 | ELSE NULL END) fri_sales, 22 | sum(CASE WHEN (d_day_name = 'Saturday') 23 | THEN ss_sales_price 24 | ELSE NULL END) sat_sales 25 | FROM date_dim, store_sales, store 26 | WHERE d_date_sk = ss_sold_date_sk AND 27 | s_store_sk = ss_store_sk AND 28 | s_gmt_offset = -5 AND 29 | d_year = 2000 30 | GROUP BY s_store_name, s_store_id 31 | ORDER BY s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales, 32 | thu_sales, fri_sales, sat_sales 33 | LIMIT 100 34 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q44.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | asceding.rnk, 3 | i1.i_product_name best_performing, 4 | i2.i_product_name worst_performing 5 | FROM (SELECT * 6 | FROM (SELECT 7 | item_sk, 8 | rank() 9 | OVER ( 10 | ORDER BY rank_col ASC) rnk 11 | FROM (SELECT 12 | ss_item_sk item_sk, 13 | avg(ss_net_profit) rank_col 14 | FROM store_sales ss1 15 | WHERE ss_store_sk = 4 16 | GROUP BY ss_item_sk 17 | HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col 18 | FROM store_sales 19 | WHERE ss_store_sk = 4 20 | AND ss_addr_sk IS NULL 21 | GROUP BY ss_store_sk)) V1) V11 22 | WHERE rnk < 11) asceding, 23 | (SELECT * 24 | FROM (SELECT 25 | item_sk, 26 | rank() 27 | OVER ( 28 | ORDER BY rank_col DESC) rnk 29 | FROM (SELECT 30 | ss_item_sk item_sk, 31 | avg(ss_net_profit) rank_col 32 | FROM store_sales ss1 33 | WHERE ss_store_sk = 4 34 | GROUP BY ss_item_sk 35 | HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col 36 | FROM store_sales 37 | WHERE ss_store_sk = 4 38 | AND ss_addr_sk IS NULL 39 | GROUP BY ss_store_sk)) V2) V21 40 | WHERE rnk < 11) descending, 41 | item i1, item i2 42 | WHERE asceding.rnk = descending.rnk 43 | AND i1.i_item_sk = asceding.item_sk 44 | AND i2.i_item_sk = descending.item_sk 45 | ORDER BY asceding.rnk 46 | LIMIT 100 47 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q45.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ca_zip, 3 | ca_city, 4 | sum(ws_sales_price) 5 | FROM web_sales, customer, customer_address, date_dim, item 6 | WHERE ws_bill_customer_sk = c_customer_sk 7 | AND c_current_addr_sk = ca_address_sk 8 | AND ws_item_sk = i_item_sk 9 | AND (substr(ca_zip, 1, 5) IN 10 | ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') 11 | OR 12 | i_item_id IN (SELECT i_item_id 13 | FROM item 14 | WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 15 | ) 16 | ) 17 | AND ws_sold_date_sk = d_date_sk 18 | AND d_qoy = 2 AND d_year = 2001 19 | GROUP BY ca_zip, ca_city 20 | ORDER BY ca_zip, ca_city 21 | LIMIT 100 22 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q46.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_last_name, 3 | c_first_name, 4 | ca_city, 5 | bought_city, 6 | ss_ticket_number, 7 | amt, 8 | profit 9 | FROM 10 | (SELECT 11 | ss_ticket_number, 12 | ss_customer_sk, 13 | ca_city bought_city, 14 | sum(ss_coupon_amt) amt, 15 | sum(ss_net_profit) profit 16 | FROM store_sales, date_dim, store, household_demographics, customer_address 17 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 18 | AND store_sales.ss_store_sk = store.s_store_sk 19 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 20 | AND store_sales.ss_addr_sk = customer_address.ca_address_sk 21 | AND (household_demographics.hd_dep_count = 4 OR 22 | household_demographics.hd_vehicle_count = 3) 23 | AND date_dim.d_dow IN (6, 0) 24 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) 25 | AND store.s_city IN ('Fairview', 'Midway', 'Fairview', 'Fairview', 'Fairview') 26 | GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, customer, 27 | customer_address current_addr 28 | WHERE ss_customer_sk = c_customer_sk 29 | AND customer.c_current_addr_sk = current_addr.ca_address_sk 30 | AND current_addr.ca_city <> bought_city 31 | ORDER BY c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number 32 | LIMIT 100 33 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q47.sql: -------------------------------------------------------------------------------- 1 | WITH v1 AS ( 2 | SELECT 3 | i_category, 4 | i_brand, 5 | s_store_name, 6 | s_company_name, 7 | d_year, 8 | d_moy, 9 | sum(ss_sales_price) sum_sales, 10 | avg(sum(ss_sales_price)) 11 | OVER 12 | (PARTITION BY i_category, i_brand, 13 | s_store_name, s_company_name, d_year) 14 | avg_monthly_sales, 15 | rank() 16 | OVER 17 | (PARTITION BY i_category, i_brand, 18 | s_store_name, s_company_name 19 | ORDER BY d_year, d_moy) rn 20 | FROM item, store_sales, date_dim, store 21 | WHERE ss_item_sk = i_item_sk AND 22 | ss_sold_date_sk = d_date_sk AND 23 | ss_store_sk = s_store_sk AND 24 | ( 25 | d_year = 1999 OR 26 | (d_year = 1999 - 1 AND d_moy = 12) OR 27 | (d_year = 1999 + 1 AND d_moy = 1) 28 | ) 29 | GROUP BY i_category, i_brand, 30 | s_store_name, s_company_name, 31 | d_year, d_moy), 32 | v2 AS ( 33 | SELECT 34 | v1.i_category, 35 | v1.i_brand, 36 | v1.s_store_name, 37 | v1.s_company_name, 38 | v1.d_year, 39 | v1.d_moy, 40 | v1.avg_monthly_sales, 41 | v1.sum_sales, 42 | v1_lag.sum_sales psum, 43 | v1_lead.sum_sales nsum 44 | FROM v1, v1 v1_lag, v1 v1_lead 45 | WHERE v1.i_category = v1_lag.i_category AND 46 | v1.i_category = v1_lead.i_category AND 47 | v1.i_brand = v1_lag.i_brand AND 48 | v1.i_brand = v1_lead.i_brand AND 49 | v1.s_store_name = v1_lag.s_store_name AND 50 | v1.s_store_name = v1_lead.s_store_name AND 51 | v1.s_company_name = v1_lag.s_company_name AND 52 | v1.s_company_name = v1_lead.s_company_name AND 53 | v1.rn = v1_lag.rn + 1 AND 54 | v1.rn = v1_lead.rn - 1) 55 | SELECT * 56 | FROM v2 57 | WHERE d_year = 1999 AND 58 | avg_monthly_sales > 0 AND 59 | CASE WHEN avg_monthly_sales > 0 60 | THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales 61 | ELSE NULL END > 0.1 62 | ORDER BY sum_sales - avg_monthly_sales, 3 63 | LIMIT 100 64 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q48.sql: -------------------------------------------------------------------------------- 1 | SELECT sum(ss_quantity) 2 | FROM store_sales, store, customer_demographics, customer_address, date_dim 3 | WHERE s_store_sk = ss_store_sk 4 | AND ss_sold_date_sk = d_date_sk AND d_year = 2001 5 | AND 6 | ( 7 | ( 8 | cd_demo_sk = ss_cdemo_sk 9 | AND 10 | cd_marital_status = 'M' 11 | AND 12 | cd_education_status = '4 yr Degree' 13 | AND 14 | ss_sales_price BETWEEN 100.00 AND 150.00 15 | ) 16 | OR 17 | ( 18 | cd_demo_sk = ss_cdemo_sk 19 | AND 20 | cd_marital_status = 'D' 21 | AND 22 | cd_education_status = '2 yr Degree' 23 | AND 24 | ss_sales_price BETWEEN 50.00 AND 100.00 25 | ) 26 | OR 27 | ( 28 | cd_demo_sk = ss_cdemo_sk 29 | AND 30 | cd_marital_status = 'S' 31 | AND 32 | cd_education_status = 'College' 33 | AND 34 | ss_sales_price BETWEEN 150.00 AND 200.00 35 | ) 36 | ) 37 | AND 38 | ( 39 | ( 40 | ss_addr_sk = ca_address_sk 41 | AND 42 | ca_country = 'United States' 43 | AND 44 | ca_state IN ('CO', 'OH', 'TX') 45 | AND ss_net_profit BETWEEN 0 AND 2000 46 | ) 47 | OR 48 | (ss_addr_sk = ca_address_sk 49 | AND 50 | ca_country = 'United States' 51 | AND 52 | ca_state IN ('OR', 'MN', 'KY') 53 | AND ss_net_profit BETWEEN 150 AND 3000 54 | ) 55 | OR 56 | (ss_addr_sk = ca_address_sk 57 | AND 58 | ca_country = 'United States' 59 | AND 60 | ca_state IN ('VA', 'CA', 'MS') 61 | AND ss_net_profit BETWEEN 50 AND 25000 62 | ) 63 | ) 64 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q50.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | s_store_name, 3 | s_company_id, 4 | s_street_number, 5 | s_street_name, 6 | s_street_type, 7 | s_suite_number, 8 | s_city, 9 | s_county, 10 | s_state, 11 | s_zip, 12 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk <= 30) 13 | THEN 1 14 | ELSE 0 END) AS `30 days `, 15 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 30) AND 16 | (sr_returned_date_sk - ss_sold_date_sk <= 60) 17 | THEN 1 18 | ELSE 0 END) AS `31 - 60 days `, 19 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 60) AND 20 | (sr_returned_date_sk - ss_sold_date_sk <= 90) 21 | THEN 1 22 | ELSE 0 END) AS `61 - 90 days `, 23 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 90) AND 24 | (sr_returned_date_sk - ss_sold_date_sk <= 120) 25 | THEN 1 26 | ELSE 0 END) AS `91 - 120 days `, 27 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 120) 28 | THEN 1 29 | ELSE 0 END) AS `>120 days ` 30 | FROM 31 | store_sales, store_returns, store, date_dim d1, date_dim d2 32 | WHERE 33 | d2.d_year = 2001 34 | AND d2.d_moy = 8 35 | AND ss_ticket_number = sr_ticket_number 36 | AND ss_item_sk = sr_item_sk 37 | AND ss_sold_date_sk = d1.d_date_sk 38 | AND sr_returned_date_sk = d2.d_date_sk 39 | AND ss_customer_sk = sr_customer_sk 40 | AND ss_store_sk = s_store_sk 41 | GROUP BY 42 | s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, 43 | s_suite_number, s_city, s_county, s_state, s_zip 44 | ORDER BY 45 | s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, 46 | s_suite_number, s_city, s_county, s_state, s_zip 47 | LIMIT 100 48 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q51.sql: -------------------------------------------------------------------------------- 1 | WITH web_v1 AS ( 2 | SELECT 3 | ws_item_sk item_sk, 4 | d_date, 5 | sum(sum(ws_sales_price)) 6 | OVER (PARTITION BY ws_item_sk 7 | ORDER BY d_date 8 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales 9 | FROM web_sales, date_dim 10 | WHERE ws_sold_date_sk = d_date_sk 11 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 12 | AND ws_item_sk IS NOT NULL 13 | GROUP BY ws_item_sk, d_date), 14 | store_v1 AS ( 15 | SELECT 16 | ss_item_sk item_sk, 17 | d_date, 18 | sum(sum(ss_sales_price)) 19 | OVER (PARTITION BY ss_item_sk 20 | ORDER BY d_date 21 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales 22 | FROM store_sales, date_dim 23 | WHERE ss_sold_date_sk = d_date_sk 24 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 25 | AND ss_item_sk IS NOT NULL 26 | GROUP BY ss_item_sk, d_date) 27 | SELECT * 28 | FROM (SELECT 29 | item_sk, 30 | d_date, 31 | web_sales, 32 | store_sales, 33 | max(web_sales) 34 | OVER (PARTITION BY item_sk 35 | ORDER BY d_date 36 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) web_cumulative, 37 | max(store_sales) 38 | OVER (PARTITION BY item_sk 39 | ORDER BY d_date 40 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) store_cumulative 41 | FROM (SELECT 42 | CASE WHEN web.item_sk IS NOT NULL 43 | THEN web.item_sk 44 | ELSE store.item_sk END item_sk, 45 | CASE WHEN web.d_date IS NOT NULL 46 | THEN web.d_date 47 | ELSE store.d_date END d_date, 48 | web.cume_sales web_sales, 49 | store.cume_sales store_sales 50 | FROM web_v1 web FULL OUTER JOIN store_v1 store ON (web.item_sk = store.item_sk 51 | AND web.d_date = store.d_date) 52 | ) x) y 53 | WHERE web_cumulative > store_cumulative 54 | ORDER BY item_sk, d_date 55 | LIMIT 100 56 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q52.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_brand_id brand_id, 4 | item.i_brand brand, 5 | sum(ss_ext_sales_price) ext_price 6 | FROM date_dim dt, store_sales, item 7 | WHERE dt.d_date_sk = store_sales.ss_sold_date_sk 8 | AND store_sales.ss_item_sk = item.i_item_sk 9 | AND item.i_manager_id = 1 10 | AND dt.d_moy = 11 11 | AND dt.d_year = 2000 12 | GROUP BY dt.d_year, item.i_brand, item.i_brand_id 13 | ORDER BY dt.d_year, ext_price DESC, brand_id 14 | LIMIT 100 15 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q53.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM 3 | (SELECT 4 | i_manufact_id, 5 | sum(ss_sales_price) sum_sales, 6 | avg(sum(ss_sales_price)) 7 | OVER (PARTITION BY i_manufact_id) avg_quarterly_sales 8 | FROM item, store_sales, date_dim, store 9 | WHERE ss_item_sk = i_item_sk AND 10 | ss_sold_date_sk = d_date_sk AND 11 | ss_store_sk = s_store_sk AND 12 | d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 13 | 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) AND 14 | ((i_category IN ('Books', 'Children', 'Electronics') AND 15 | i_class IN ('personal', 'portable', 'reference', 'self-help') AND 16 | i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 17 | 'exportiunivamalg #9', 'scholaramalgamalg #9')) 18 | OR 19 | (i_category IN ('Women', 'Music', 'Men') AND 20 | i_class IN ('accessories', 'classical', 'fragrances', 'pants') AND 21 | i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 22 | 'importoamalg #1'))) 23 | GROUP BY i_manufact_id, d_qoy) tmp1 24 | WHERE CASE WHEN avg_quarterly_sales > 0 25 | THEN abs(sum_sales - avg_quarterly_sales) / avg_quarterly_sales 26 | ELSE NULL END > 0.1 27 | ORDER BY avg_quarterly_sales, 28 | sum_sales, 29 | i_manufact_id 30 | LIMIT 100 31 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q54.sql: -------------------------------------------------------------------------------- 1 | WITH my_customers AS ( 2 | SELECT DISTINCT 3 | c_customer_sk, 4 | c_current_addr_sk 5 | FROM 6 | (SELECT 7 | cs_sold_date_sk sold_date_sk, 8 | cs_bill_customer_sk customer_sk, 9 | cs_item_sk item_sk 10 | FROM catalog_sales 11 | UNION ALL 12 | SELECT 13 | ws_sold_date_sk sold_date_sk, 14 | ws_bill_customer_sk customer_sk, 15 | ws_item_sk item_sk 16 | FROM web_sales 17 | ) cs_or_ws_sales, 18 | item, 19 | date_dim, 20 | customer 21 | WHERE sold_date_sk = d_date_sk 22 | AND item_sk = i_item_sk 23 | AND i_category = 'Women' 24 | AND i_class = 'maternity' 25 | AND c_customer_sk = cs_or_ws_sales.customer_sk 26 | AND d_moy = 12 27 | AND d_year = 1998 28 | ) 29 | , my_revenue AS ( 30 | SELECT 31 | c_customer_sk, 32 | sum(ss_ext_sales_price) AS revenue 33 | FROM my_customers, 34 | store_sales, 35 | customer_address, 36 | store, 37 | date_dim 38 | WHERE c_current_addr_sk = ca_address_sk 39 | AND ca_county = s_county 40 | AND ca_state = s_state 41 | AND ss_sold_date_sk = d_date_sk 42 | AND c_customer_sk = ss_customer_sk 43 | AND d_month_seq BETWEEN (SELECT DISTINCT d_month_seq + 1 44 | FROM date_dim 45 | WHERE d_year = 1998 AND d_moy = 12) 46 | AND (SELECT DISTINCT d_month_seq + 3 47 | FROM date_dim 48 | WHERE d_year = 1998 AND d_moy = 12) 49 | GROUP BY c_customer_sk 50 | ) 51 | , segments AS 52 | (SELECT cast((revenue / 50) AS INT) AS segment 53 | FROM my_revenue) 54 | SELECT 55 | segment, 56 | count(*) AS num_customers, 57 | segment * 50 AS segment_base 58 | FROM segments 59 | GROUP BY segment 60 | ORDER BY segment, num_customers 61 | LIMIT 100 62 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q55.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_brand_id brand_id, 3 | i_brand brand, 4 | sum(ss_ext_sales_price) ext_price 5 | FROM date_dim, store_sales, item 6 | WHERE d_date_sk = ss_sold_date_sk 7 | AND ss_item_sk = i_item_sk 8 | AND i_manager_id = 28 9 | AND d_moy = 11 10 | AND d_year = 1999 11 | GROUP BY i_brand, i_brand_id 12 | ORDER BY ext_price DESC, brand_id 13 | LIMIT 100 14 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q56.sql: -------------------------------------------------------------------------------- 1 | WITH ss AS ( 2 | SELECT 3 | i_item_id, 4 | sum(ss_ext_sales_price) total_sales 5 | FROM 6 | store_sales, date_dim, customer_address, item 7 | WHERE 8 | i_item_id IN (SELECT i_item_id 9 | FROM item 10 | WHERE i_color IN ('slate', 'blanched', 'burnished')) 11 | AND ss_item_sk = i_item_sk 12 | AND ss_sold_date_sk = d_date_sk 13 | AND d_year = 2001 14 | AND d_moy = 2 15 | AND ss_addr_sk = ca_address_sk 16 | AND ca_gmt_offset = -5 17 | GROUP BY i_item_id), 18 | cs AS ( 19 | SELECT 20 | i_item_id, 21 | sum(cs_ext_sales_price) total_sales 22 | FROM 23 | catalog_sales, date_dim, customer_address, item 24 | WHERE 25 | i_item_id IN (SELECT i_item_id 26 | FROM item 27 | WHERE i_color IN ('slate', 'blanched', 'burnished')) 28 | AND cs_item_sk = i_item_sk 29 | AND cs_sold_date_sk = d_date_sk 30 | AND d_year = 2001 31 | AND d_moy = 2 32 | AND cs_bill_addr_sk = ca_address_sk 33 | AND ca_gmt_offset = -5 34 | GROUP BY i_item_id), 35 | ws AS ( 36 | SELECT 37 | i_item_id, 38 | sum(ws_ext_sales_price) total_sales 39 | FROM 40 | web_sales, date_dim, customer_address, item 41 | WHERE 42 | i_item_id IN (SELECT i_item_id 43 | FROM item 44 | WHERE i_color IN ('slate', 'blanched', 'burnished')) 45 | AND ws_item_sk = i_item_sk 46 | AND ws_sold_date_sk = d_date_sk 47 | AND d_year = 2001 48 | AND d_moy = 2 49 | AND ws_bill_addr_sk = ca_address_sk 50 | AND ca_gmt_offset = -5 51 | GROUP BY i_item_id) 52 | SELECT 53 | i_item_id, 54 | sum(total_sales) total_sales 55 | FROM (SELECT * 56 | FROM ss 57 | UNION ALL 58 | SELECT * 59 | FROM cs 60 | UNION ALL 61 | SELECT * 62 | FROM ws) tmp1 63 | GROUP BY i_item_id 64 | ORDER BY total_sales 65 | LIMIT 100 66 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q57.sql: -------------------------------------------------------------------------------- 1 | WITH v1 AS ( 2 | SELECT 3 | i_category, 4 | i_brand, 5 | cc_name, 6 | d_year, 7 | d_moy, 8 | sum(cs_sales_price) sum_sales, 9 | avg(sum(cs_sales_price)) 10 | OVER 11 | (PARTITION BY i_category, i_brand, cc_name, d_year) 12 | avg_monthly_sales, 13 | rank() 14 | OVER 15 | (PARTITION BY i_category, i_brand, cc_name 16 | ORDER BY d_year, d_moy) rn 17 | FROM item, catalog_sales, date_dim, call_center 18 | WHERE cs_item_sk = i_item_sk AND 19 | cs_sold_date_sk = d_date_sk AND 20 | cc_call_center_sk = cs_call_center_sk AND 21 | ( 22 | d_year = 1999 OR 23 | (d_year = 1999 - 1 AND d_moy = 12) OR 24 | (d_year = 1999 + 1 AND d_moy = 1) 25 | ) 26 | GROUP BY i_category, i_brand, 27 | cc_name, d_year, d_moy), 28 | v2 AS ( 29 | SELECT 30 | v1.i_category, 31 | v1.i_brand, 32 | v1.cc_name, 33 | v1.d_year, 34 | v1.d_moy, 35 | v1.avg_monthly_sales, 36 | v1.sum_sales, 37 | v1_lag.sum_sales psum, 38 | v1_lead.sum_sales nsum 39 | FROM v1, v1 v1_lag, v1 v1_lead 40 | WHERE v1.i_category = v1_lag.i_category AND 41 | v1.i_category = v1_lead.i_category AND 42 | v1.i_brand = v1_lag.i_brand AND 43 | v1.i_brand = v1_lead.i_brand AND 44 | v1.cc_name = v1_lag.cc_name AND 45 | v1.cc_name = v1_lead.cc_name AND 46 | v1.rn = v1_lag.rn + 1 AND 47 | v1.rn = v1_lead.rn - 1) 48 | SELECT * 49 | FROM v2 50 | WHERE d_year = 1999 AND 51 | avg_monthly_sales > 0 AND 52 | CASE WHEN avg_monthly_sales > 0 53 | THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales 54 | ELSE NULL END > 0.1 55 | ORDER BY sum_sales - avg_monthly_sales, 3 56 | LIMIT 100 57 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q58.sql: -------------------------------------------------------------------------------- 1 | WITH ss_items AS 2 | (SELECT 3 | i_item_id item_id, 4 | sum(ss_ext_sales_price) ss_item_rev 5 | FROM store_sales, item, date_dim 6 | WHERE ss_item_sk = i_item_sk 7 | AND d_date IN (SELECT d_date 8 | FROM date_dim 9 | WHERE d_week_seq = (SELECT d_week_seq 10 | FROM date_dim 11 | WHERE d_date = '2000-01-03')) 12 | AND ss_sold_date_sk = d_date_sk 13 | GROUP BY i_item_id), 14 | cs_items AS 15 | (SELECT 16 | i_item_id item_id, 17 | sum(cs_ext_sales_price) cs_item_rev 18 | FROM catalog_sales, item, date_dim 19 | WHERE cs_item_sk = i_item_sk 20 | AND d_date IN (SELECT d_date 21 | FROM date_dim 22 | WHERE d_week_seq = (SELECT d_week_seq 23 | FROM date_dim 24 | WHERE d_date = '2000-01-03')) 25 | AND cs_sold_date_sk = d_date_sk 26 | GROUP BY i_item_id), 27 | ws_items AS 28 | (SELECT 29 | i_item_id item_id, 30 | sum(ws_ext_sales_price) ws_item_rev 31 | FROM web_sales, item, date_dim 32 | WHERE ws_item_sk = i_item_sk 33 | AND d_date IN (SELECT d_date 34 | FROM date_dim 35 | WHERE d_week_seq = (SELECT d_week_seq 36 | FROM date_dim 37 | WHERE d_date = '2000-01-03')) 38 | AND ws_sold_date_sk = d_date_sk 39 | GROUP BY i_item_id) 40 | SELECT 41 | ss_items.item_id, 42 | ss_item_rev, 43 | ss_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ss_dev, 44 | cs_item_rev, 45 | cs_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 cs_dev, 46 | ws_item_rev, 47 | ws_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ws_dev, 48 | (ss_item_rev + cs_item_rev + ws_item_rev) / 3 average 49 | FROM ss_items, cs_items, ws_items 50 | WHERE ss_items.item_id = cs_items.item_id 51 | AND ss_items.item_id = ws_items.item_id 52 | AND ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev 53 | AND ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev 54 | AND cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev 55 | AND cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev 56 | AND ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev 57 | AND ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev 58 | ORDER BY item_id, ss_item_rev 59 | LIMIT 100 60 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q59.sql: -------------------------------------------------------------------------------- 1 | WITH wss AS 2 | (SELECT 3 | d_week_seq, 4 | ss_store_sk, 5 | sum(CASE WHEN (d_day_name = 'Sunday') 6 | THEN ss_sales_price 7 | ELSE NULL END) sun_sales, 8 | sum(CASE WHEN (d_day_name = 'Monday') 9 | THEN ss_sales_price 10 | ELSE NULL END) mon_sales, 11 | sum(CASE WHEN (d_day_name = 'Tuesday') 12 | THEN ss_sales_price 13 | ELSE NULL END) tue_sales, 14 | sum(CASE WHEN (d_day_name = 'Wednesday') 15 | THEN ss_sales_price 16 | ELSE NULL END) wed_sales, 17 | sum(CASE WHEN (d_day_name = 'Thursday') 18 | THEN ss_sales_price 19 | ELSE NULL END) thu_sales, 20 | sum(CASE WHEN (d_day_name = 'Friday') 21 | THEN ss_sales_price 22 | ELSE NULL END) fri_sales, 23 | sum(CASE WHEN (d_day_name = 'Saturday') 24 | THEN ss_sales_price 25 | ELSE NULL END) sat_sales 26 | FROM store_sales, date_dim 27 | WHERE d_date_sk = ss_sold_date_sk 28 | GROUP BY d_week_seq, ss_store_sk 29 | ) 30 | SELECT 31 | s_store_name1, 32 | s_store_id1, 33 | d_week_seq1, 34 | sun_sales1 / sun_sales2, 35 | mon_sales1 / mon_sales2, 36 | tue_sales1 / tue_sales2, 37 | wed_sales1 / wed_sales2, 38 | thu_sales1 / thu_sales2, 39 | fri_sales1 / fri_sales2, 40 | sat_sales1 / sat_sales2 41 | FROM 42 | (SELECT 43 | s_store_name s_store_name1, 44 | wss.d_week_seq d_week_seq1, 45 | s_store_id s_store_id1, 46 | sun_sales sun_sales1, 47 | mon_sales mon_sales1, 48 | tue_sales tue_sales1, 49 | wed_sales wed_sales1, 50 | thu_sales thu_sales1, 51 | fri_sales fri_sales1, 52 | sat_sales sat_sales1 53 | FROM wss, store, date_dim d 54 | WHERE d.d_week_seq = wss.d_week_seq AND 55 | ss_store_sk = s_store_sk AND 56 | d_month_seq BETWEEN 1212 AND 1212 + 11) y, 57 | (SELECT 58 | s_store_name s_store_name2, 59 | wss.d_week_seq d_week_seq2, 60 | s_store_id s_store_id2, 61 | sun_sales sun_sales2, 62 | mon_sales mon_sales2, 63 | tue_sales tue_sales2, 64 | wed_sales wed_sales2, 65 | thu_sales thu_sales2, 66 | fri_sales fri_sales2, 67 | sat_sales sat_sales2 68 | FROM wss, store, date_dim d 69 | WHERE d.d_week_seq = wss.d_week_seq AND 70 | ss_store_sk = s_store_sk AND 71 | d_month_seq BETWEEN 1212 + 12 AND 1212 + 23) x 72 | WHERE s_store_id1 = s_store_id2 73 | AND d_week_seq1 = d_week_seq2 - 52 74 | ORDER BY s_store_name1, s_store_id1, d_week_seq1 75 | LIMIT 100 76 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q6.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | a.ca_state state, 3 | count(*) cnt 4 | FROM 5 | customer_address a, customer c, store_sales s, date_dim d, item i 6 | WHERE a.ca_address_sk = c.c_current_addr_sk 7 | AND c.c_customer_sk = s.ss_customer_sk 8 | AND s.ss_sold_date_sk = d.d_date_sk 9 | AND s.ss_item_sk = i.i_item_sk 10 | AND d.d_month_seq = 11 | (SELECT DISTINCT (d_month_seq) 12 | FROM date_dim 13 | WHERE d_year = 2000 AND d_moy = 1) 14 | AND i.i_current_price > 1.2 * 15 | (SELECT avg(j.i_current_price) 16 | FROM item j 17 | WHERE j.i_category = i.i_category) 18 | GROUP BY a.ca_state 19 | HAVING count(*) >= 10 20 | ORDER BY cnt 21 | LIMIT 100 22 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q60.sql: -------------------------------------------------------------------------------- 1 | WITH ss AS ( 2 | SELECT 3 | i_item_id, 4 | sum(ss_ext_sales_price) total_sales 5 | FROM store_sales, date_dim, customer_address, item 6 | WHERE 7 | i_item_id IN (SELECT i_item_id 8 | FROM item 9 | WHERE i_category IN ('Music')) 10 | AND ss_item_sk = i_item_sk 11 | AND ss_sold_date_sk = d_date_sk 12 | AND d_year = 1998 13 | AND d_moy = 9 14 | AND ss_addr_sk = ca_address_sk 15 | AND ca_gmt_offset = -5 16 | GROUP BY i_item_id), 17 | cs AS ( 18 | SELECT 19 | i_item_id, 20 | sum(cs_ext_sales_price) total_sales 21 | FROM catalog_sales, date_dim, customer_address, item 22 | WHERE 23 | i_item_id IN (SELECT i_item_id 24 | FROM item 25 | WHERE i_category IN ('Music')) 26 | AND cs_item_sk = i_item_sk 27 | AND cs_sold_date_sk = d_date_sk 28 | AND d_year = 1998 29 | AND d_moy = 9 30 | AND cs_bill_addr_sk = ca_address_sk 31 | AND ca_gmt_offset = -5 32 | GROUP BY i_item_id), 33 | ws AS ( 34 | SELECT 35 | i_item_id, 36 | sum(ws_ext_sales_price) total_sales 37 | FROM web_sales, date_dim, customer_address, item 38 | WHERE 39 | i_item_id IN (SELECT i_item_id 40 | FROM item 41 | WHERE i_category IN ('Music')) 42 | AND ws_item_sk = i_item_sk 43 | AND ws_sold_date_sk = d_date_sk 44 | AND d_year = 1998 45 | AND d_moy = 9 46 | AND ws_bill_addr_sk = ca_address_sk 47 | AND ca_gmt_offset = -5 48 | GROUP BY i_item_id) 49 | SELECT 50 | i_item_id, 51 | sum(total_sales) total_sales 52 | FROM (SELECT * 53 | FROM ss 54 | UNION ALL 55 | SELECT * 56 | FROM cs 57 | UNION ALL 58 | SELECT * 59 | FROM ws) tmp1 60 | GROUP BY i_item_id 61 | ORDER BY i_item_id, total_sales 62 | LIMIT 100 63 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q61.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | promotions, 3 | total, 4 | cast(promotions AS DECIMAL(15, 4)) / cast(total AS DECIMAL(15, 4)) * 100 5 | FROM 6 | (SELECT sum(ss_ext_sales_price) promotions 7 | FROM store_sales, store, promotion, date_dim, customer, customer_address, item 8 | WHERE ss_sold_date_sk = d_date_sk 9 | AND ss_store_sk = s_store_sk 10 | AND ss_promo_sk = p_promo_sk 11 | AND ss_customer_sk = c_customer_sk 12 | AND ca_address_sk = c_current_addr_sk 13 | AND ss_item_sk = i_item_sk 14 | AND ca_gmt_offset = -5 15 | AND i_category = 'Jewelry' 16 | AND (p_channel_dmail = 'Y' OR p_channel_email = 'Y' OR p_channel_tv = 'Y') 17 | AND s_gmt_offset = -5 18 | AND d_year = 1998 19 | AND d_moy = 11) promotional_sales, 20 | (SELECT sum(ss_ext_sales_price) total 21 | FROM store_sales, store, date_dim, customer, customer_address, item 22 | WHERE ss_sold_date_sk = d_date_sk 23 | AND ss_store_sk = s_store_sk 24 | AND ss_customer_sk = c_customer_sk 25 | AND ca_address_sk = c_current_addr_sk 26 | AND ss_item_sk = i_item_sk 27 | AND ca_gmt_offset = -5 28 | AND i_category = 'Jewelry' 29 | AND s_gmt_offset = -5 30 | AND d_year = 1998 31 | AND d_moy = 11) all_sales 32 | ORDER BY promotions, total 33 | LIMIT 100 34 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q62.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | substr(w_warehouse_name, 1, 20), 3 | sm_type, 4 | web_name, 5 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk <= 30) 6 | THEN 1 7 | ELSE 0 END) AS `30 days `, 8 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 30) AND 9 | (ws_ship_date_sk - ws_sold_date_sk <= 60) 10 | THEN 1 11 | ELSE 0 END) AS `31 - 60 days `, 12 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 60) AND 13 | (ws_ship_date_sk - ws_sold_date_sk <= 90) 14 | THEN 1 15 | ELSE 0 END) AS `61 - 90 days `, 16 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 90) AND 17 | (ws_ship_date_sk - ws_sold_date_sk <= 120) 18 | THEN 1 19 | ELSE 0 END) AS `91 - 120 days `, 20 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 120) 21 | THEN 1 22 | ELSE 0 END) AS `>120 days ` 23 | FROM 24 | web_sales, warehouse, ship_mode, web_site, date_dim 25 | WHERE 26 | d_month_seq BETWEEN 1200 AND 1200 + 11 27 | AND ws_ship_date_sk = d_date_sk 28 | AND ws_warehouse_sk = w_warehouse_sk 29 | AND ws_ship_mode_sk = sm_ship_mode_sk 30 | AND ws_web_site_sk = web_site_sk 31 | GROUP BY 32 | substr(w_warehouse_name, 1, 20), sm_type, web_name 33 | ORDER BY 34 | substr(w_warehouse_name, 1, 20), sm_type, web_name 35 | LIMIT 100 36 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q63.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM (SELECT 3 | i_manager_id, 4 | sum(ss_sales_price) sum_sales, 5 | avg(sum(ss_sales_price)) 6 | OVER (PARTITION BY i_manager_id) avg_monthly_sales 7 | FROM item 8 | , store_sales 9 | , date_dim 10 | , store 11 | WHERE ss_item_sk = i_item_sk 12 | AND ss_sold_date_sk = d_date_sk 13 | AND ss_store_sk = s_store_sk 14 | AND d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, 15 | 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) 16 | AND ((i_category IN ('Books', 'Children', 'Electronics') 17 | AND i_class IN ('personal', 'portable', 'refernece', 'self-help') 18 | AND i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 19 | 'exportiunivamalg #9', 'scholaramalgamalg #9')) 20 | OR (i_category IN ('Women', 'Music', 'Men') 21 | AND i_class IN ('accessories', 'classical', 'fragrances', 'pants') 22 | AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 23 | 'importoamalg #1'))) 24 | GROUP BY i_manager_id, d_moy) tmp1 25 | WHERE CASE WHEN avg_monthly_sales > 0 26 | THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales 27 | ELSE NULL END > 0.1 28 | ORDER BY i_manager_id 29 | , avg_monthly_sales 30 | , sum_sales 31 | LIMIT 100 32 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q64.sql: -------------------------------------------------------------------------------- 1 | WITH cs_ui AS 2 | (SELECT 3 | cs_item_sk, 4 | sum(cs_ext_list_price) AS sale, 5 | sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit) AS refund 6 | FROM catalog_sales 7 | , catalog_returns 8 | WHERE cs_item_sk = cr_item_sk 9 | AND cs_order_number = cr_order_number 10 | GROUP BY cs_item_sk 11 | HAVING sum(cs_ext_list_price) > 2 * sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit)), 12 | cross_sales AS 13 | (SELECT 14 | i_product_name product_name, 15 | i_item_sk item_sk, 16 | s_store_name store_name, 17 | s_zip store_zip, 18 | ad1.ca_street_number b_street_number, 19 | ad1.ca_street_name b_streen_name, 20 | ad1.ca_city b_city, 21 | ad1.ca_zip b_zip, 22 | ad2.ca_street_number c_street_number, 23 | ad2.ca_street_name c_street_name, 24 | ad2.ca_city c_city, 25 | ad2.ca_zip c_zip, 26 | d1.d_year AS syear, 27 | d2.d_year AS fsyear, 28 | d3.d_year s2year, 29 | count(*) cnt, 30 | sum(ss_wholesale_cost) s1, 31 | sum(ss_list_price) s2, 32 | sum(ss_coupon_amt) s3 33 | FROM store_sales, store_returns, cs_ui, date_dim d1, date_dim d2, date_dim d3, 34 | store, customer, customer_demographics cd1, customer_demographics cd2, 35 | promotion, household_demographics hd1, household_demographics hd2, 36 | customer_address ad1, customer_address ad2, income_band ib1, income_band ib2, item 37 | WHERE ss_store_sk = s_store_sk AND 38 | ss_sold_date_sk = d1.d_date_sk AND 39 | ss_customer_sk = c_customer_sk AND 40 | ss_cdemo_sk = cd1.cd_demo_sk AND 41 | ss_hdemo_sk = hd1.hd_demo_sk AND 42 | ss_addr_sk = ad1.ca_address_sk AND 43 | ss_item_sk = i_item_sk AND 44 | ss_item_sk = sr_item_sk AND 45 | ss_ticket_number = sr_ticket_number AND 46 | ss_item_sk = cs_ui.cs_item_sk AND 47 | c_current_cdemo_sk = cd2.cd_demo_sk AND 48 | c_current_hdemo_sk = hd2.hd_demo_sk AND 49 | c_current_addr_sk = ad2.ca_address_sk AND 50 | c_first_sales_date_sk = d2.d_date_sk AND 51 | c_first_shipto_date_sk = d3.d_date_sk AND 52 | ss_promo_sk = p_promo_sk AND 53 | hd1.hd_income_band_sk = ib1.ib_income_band_sk AND 54 | hd2.hd_income_band_sk = ib2.ib_income_band_sk AND 55 | cd1.cd_marital_status <> cd2.cd_marital_status AND 56 | i_color IN ('purple', 'burlywood', 'indian', 'spring', 'floral', 'medium') AND 57 | i_current_price BETWEEN 64 AND 64 + 10 AND 58 | i_current_price BETWEEN 64 + 1 AND 64 + 15 59 | GROUP BY i_product_name, i_item_sk, s_store_name, s_zip, ad1.ca_street_number, 60 | ad1.ca_street_name, ad1.ca_city, ad1.ca_zip, ad2.ca_street_number, 61 | ad2.ca_street_name, ad2.ca_city, ad2.ca_zip, d1.d_year, d2.d_year, d3.d_year 62 | ) 63 | SELECT 64 | cs1.product_name, 65 | cs1.store_name, 66 | cs1.store_zip, 67 | cs1.b_street_number, 68 | cs1.b_streen_name, 69 | cs1.b_city, 70 | cs1.b_zip, 71 | cs1.c_street_number, 72 | cs1.c_street_name, 73 | cs1.c_city, 74 | cs1.c_zip, 75 | cs1.syear, 76 | cs1.cnt, 77 | cs1.s1, 78 | cs1.s2, 79 | cs1.s3, 80 | cs2.s1, 81 | cs2.s2, 82 | cs2.s3, 83 | cs2.syear, 84 | cs2.cnt 85 | FROM cross_sales cs1, cross_sales cs2 86 | WHERE cs1.item_sk = cs2.item_sk AND 87 | cs1.syear = 1999 AND 88 | cs2.syear = 1999 + 1 AND 89 | cs2.cnt <= cs1.cnt AND 90 | cs1.store_name = cs2.store_name AND 91 | cs1.store_zip = cs2.store_zip 92 | ORDER BY cs1.product_name, cs1.store_name, cs2.cnt 93 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q65.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | s_store_name, 3 | i_item_desc, 4 | sc.revenue, 5 | i_current_price, 6 | i_wholesale_cost, 7 | i_brand 8 | FROM store, item, 9 | (SELECT 10 | ss_store_sk, 11 | avg(revenue) AS ave 12 | FROM 13 | (SELECT 14 | ss_store_sk, 15 | ss_item_sk, 16 | sum(ss_sales_price) AS revenue 17 | FROM store_sales, date_dim 18 | WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 19 | GROUP BY ss_store_sk, ss_item_sk) sa 20 | GROUP BY ss_store_sk) sb, 21 | (SELECT 22 | ss_store_sk, 23 | ss_item_sk, 24 | sum(ss_sales_price) AS revenue 25 | FROM store_sales, date_dim 26 | WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 27 | GROUP BY ss_store_sk, ss_item_sk) sc 28 | WHERE sb.ss_store_sk = sc.ss_store_sk AND 29 | sc.revenue <= 0.1 * sb.ave AND 30 | s_store_sk = sc.ss_store_sk AND 31 | i_item_sk = sc.ss_item_sk 32 | ORDER BY s_store_name, i_item_desc 33 | LIMIT 100 34 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q67.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM 3 | (SELECT 4 | i_category, 5 | i_class, 6 | i_brand, 7 | i_product_name, 8 | d_year, 9 | d_qoy, 10 | d_moy, 11 | s_store_id, 12 | sumsales, 13 | rank() 14 | OVER (PARTITION BY i_category 15 | ORDER BY sumsales DESC) rk 16 | FROM 17 | (SELECT 18 | i_category, 19 | i_class, 20 | i_brand, 21 | i_product_name, 22 | d_year, 23 | d_qoy, 24 | d_moy, 25 | s_store_id, 26 | sum(coalesce(ss_sales_price * ss_quantity, 0)) sumsales 27 | FROM store_sales, date_dim, store, item 28 | WHERE ss_sold_date_sk = d_date_sk 29 | AND ss_item_sk = i_item_sk 30 | AND ss_store_sk = s_store_sk 31 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 32 | GROUP BY ROLLUP (i_category, i_class, i_brand, i_product_name, d_year, d_qoy, 33 | d_moy, s_store_id)) dw1) dw2 34 | WHERE rk <= 100 35 | ORDER BY 36 | i_category, i_class, i_brand, i_product_name, d_year, 37 | d_qoy, d_moy, s_store_id, sumsales, rk 38 | LIMIT 100 39 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q68.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_last_name, 3 | c_first_name, 4 | ca_city, 5 | bought_city, 6 | ss_ticket_number, 7 | extended_price, 8 | extended_tax, 9 | list_price 10 | FROM (SELECT 11 | ss_ticket_number, 12 | ss_customer_sk, 13 | ca_city bought_city, 14 | sum(ss_ext_sales_price) extended_price, 15 | sum(ss_ext_list_price) list_price, 16 | sum(ss_ext_tax) extended_tax 17 | FROM store_sales, date_dim, store, household_demographics, customer_address 18 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 19 | AND store_sales.ss_store_sk = store.s_store_sk 20 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 21 | AND store_sales.ss_addr_sk = customer_address.ca_address_sk 22 | AND date_dim.d_dom BETWEEN 1 AND 2 23 | AND (household_demographics.hd_dep_count = 4 OR 24 | household_demographics.hd_vehicle_count = 3) 25 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) 26 | AND store.s_city IN ('Midway', 'Fairview') 27 | GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, 28 | customer, 29 | customer_address current_addr 30 | WHERE ss_customer_sk = c_customer_sk 31 | AND customer.c_current_addr_sk = current_addr.ca_address_sk 32 | AND current_addr.ca_city <> bought_city 33 | ORDER BY c_last_name, ss_ticket_number 34 | LIMIT 100 35 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q69.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cd_gender, 3 | cd_marital_status, 4 | cd_education_status, 5 | count(*) cnt1, 6 | cd_purchase_estimate, 7 | count(*) cnt2, 8 | cd_credit_rating, 9 | count(*) cnt3 10 | FROM 11 | customer c, customer_address ca, customer_demographics 12 | WHERE 13 | c.c_current_addr_sk = ca.ca_address_sk AND 14 | ca_state IN ('KY', 'GA', 'NM') AND 15 | cd_demo_sk = c.c_current_cdemo_sk AND 16 | exists(SELECT * 17 | FROM store_sales, date_dim 18 | WHERE c.c_customer_sk = ss_customer_sk AND 19 | ss_sold_date_sk = d_date_sk AND 20 | d_year = 2001 AND 21 | d_moy BETWEEN 4 AND 4 + 2) AND 22 | (NOT exists(SELECT * 23 | FROM web_sales, date_dim 24 | WHERE c.c_customer_sk = ws_bill_customer_sk AND 25 | ws_sold_date_sk = d_date_sk AND 26 | d_year = 2001 AND 27 | d_moy BETWEEN 4 AND 4 + 2) AND 28 | NOT exists(SELECT * 29 | FROM catalog_sales, date_dim 30 | WHERE c.c_customer_sk = cs_ship_customer_sk AND 31 | cs_sold_date_sk = d_date_sk AND 32 | d_year = 2001 AND 33 | d_moy BETWEEN 4 AND 4 + 2)) 34 | GROUP BY cd_gender, cd_marital_status, cd_education_status, 35 | cd_purchase_estimate, cd_credit_rating 36 | ORDER BY cd_gender, cd_marital_status, cd_education_status, 37 | cd_purchase_estimate, cd_credit_rating 38 | LIMIT 100 39 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q7.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | FROM store_sales, customer_demographics, date_dim, item, promotion 8 | WHERE ss_sold_date_sk = d_date_sk AND 9 | ss_item_sk = i_item_sk AND 10 | ss_cdemo_sk = cd_demo_sk AND 11 | ss_promo_sk = p_promo_sk AND 12 | cd_gender = 'M' AND 13 | cd_marital_status = 'S' AND 14 | cd_education_status = 'College' AND 15 | (p_channel_email = 'N' OR p_channel_event = 'N') AND 16 | d_year = 2000 17 | GROUP BY i_item_id 18 | ORDER BY i_item_id 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q70.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sum(ss_net_profit) AS total_sum, 3 | s_state, 4 | s_county, 5 | grouping(s_state) + grouping(s_county) AS lochierarchy, 6 | rank() 7 | OVER ( 8 | PARTITION BY grouping(s_state) + grouping(s_county), 9 | CASE WHEN grouping(s_county) = 0 10 | THEN s_state END 11 | ORDER BY sum(ss_net_profit) DESC) AS rank_within_parent 12 | FROM 13 | store_sales, date_dim d1, store 14 | WHERE 15 | d1.d_month_seq BETWEEN 1200 AND 1200 + 11 16 | AND d1.d_date_sk = ss_sold_date_sk 17 | AND s_store_sk = ss_store_sk 18 | AND s_state IN 19 | (SELECT s_state 20 | FROM 21 | (SELECT 22 | s_state AS s_state, 23 | rank() 24 | OVER (PARTITION BY s_state 25 | ORDER BY sum(ss_net_profit) DESC) AS ranking 26 | FROM store_sales, store, date_dim 27 | WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 28 | AND d_date_sk = ss_sold_date_sk 29 | AND s_store_sk = ss_store_sk 30 | GROUP BY s_state) tmp1 31 | WHERE ranking <= 5) 32 | GROUP BY ROLLUP (s_state, s_county) 33 | ORDER BY 34 | lochierarchy DESC 35 | , CASE WHEN lochierarchy = 0 36 | THEN s_state END 37 | , rank_within_parent 38 | LIMIT 100 39 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q71.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_brand_id brand_id, 3 | i_brand brand, 4 | t_hour, 5 | t_minute, 6 | sum(ext_price) ext_price 7 | FROM item, 8 | (SELECT 9 | ws_ext_sales_price AS ext_price, 10 | ws_sold_date_sk AS sold_date_sk, 11 | ws_item_sk AS sold_item_sk, 12 | ws_sold_time_sk AS time_sk 13 | FROM web_sales, date_dim 14 | WHERE d_date_sk = ws_sold_date_sk 15 | AND d_moy = 11 16 | AND d_year = 1999 17 | UNION ALL 18 | SELECT 19 | cs_ext_sales_price AS ext_price, 20 | cs_sold_date_sk AS sold_date_sk, 21 | cs_item_sk AS sold_item_sk, 22 | cs_sold_time_sk AS time_sk 23 | FROM catalog_sales, date_dim 24 | WHERE d_date_sk = cs_sold_date_sk 25 | AND d_moy = 11 26 | AND d_year = 1999 27 | UNION ALL 28 | SELECT 29 | ss_ext_sales_price AS ext_price, 30 | ss_sold_date_sk AS sold_date_sk, 31 | ss_item_sk AS sold_item_sk, 32 | ss_sold_time_sk AS time_sk 33 | FROM store_sales, date_dim 34 | WHERE d_date_sk = ss_sold_date_sk 35 | AND d_moy = 11 36 | AND d_year = 1999 37 | ) AS tmp, time_dim 38 | WHERE 39 | sold_item_sk = i_item_sk 40 | AND i_manager_id = 1 41 | AND time_sk = t_time_sk 42 | AND (t_meal_time = 'breakfast' OR t_meal_time = 'dinner') 43 | GROUP BY i_brand, i_brand_id, t_hour, t_minute 44 | ORDER BY ext_price DESC, brand_id 45 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q72.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_desc, 3 | w_warehouse_name, 4 | d1.d_week_seq, 5 | count(CASE WHEN p_promo_sk IS NULL 6 | THEN 1 7 | ELSE 0 END) no_promo, 8 | count(CASE WHEN p_promo_sk IS NOT NULL 9 | THEN 1 10 | ELSE 0 END) promo, 11 | count(*) total_cnt 12 | FROM catalog_sales 13 | JOIN inventory ON (cs_item_sk = inv_item_sk) 14 | JOIN warehouse ON (w_warehouse_sk = inv_warehouse_sk) 15 | JOIN item ON (i_item_sk = cs_item_sk) 16 | JOIN customer_demographics ON (cs_bill_cdemo_sk = cd_demo_sk) 17 | JOIN household_demographics ON (cs_bill_hdemo_sk = hd_demo_sk) 18 | JOIN date_dim d1 ON (cs_sold_date_sk = d1.d_date_sk) 19 | JOIN date_dim d2 ON (inv_date_sk = d2.d_date_sk) 20 | JOIN date_dim d3 ON (cs_ship_date_sk = d3.d_date_sk) 21 | LEFT OUTER JOIN promotion ON (cs_promo_sk = p_promo_sk) 22 | LEFT OUTER JOIN catalog_returns ON (cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number) 23 | WHERE d1.d_week_seq = d2.d_week_seq 24 | AND inv_quantity_on_hand < cs_quantity 25 | AND d3.d_date > (cast(d1.d_date AS DATE) + interval 5 days) 26 | AND hd_buy_potential = '>10000' 27 | AND d1.d_year = 1999 28 | AND hd_buy_potential = '>10000' 29 | AND cd_marital_status = 'D' 30 | AND d1.d_year = 1999 31 | GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq 32 | ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq 33 | LIMIT 100 34 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q73.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_last_name, 3 | c_first_name, 4 | c_salutation, 5 | c_preferred_cust_flag, 6 | ss_ticket_number, 7 | cnt 8 | FROM 9 | (SELECT 10 | ss_ticket_number, 11 | ss_customer_sk, 12 | count(*) cnt 13 | FROM store_sales, date_dim, store, household_demographics 14 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 15 | AND store_sales.ss_store_sk = store.s_store_sk 16 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 17 | AND date_dim.d_dom BETWEEN 1 AND 2 18 | AND (household_demographics.hd_buy_potential = '>10000' OR 19 | household_demographics.hd_buy_potential = 'unknown') 20 | AND household_demographics.hd_vehicle_count > 0 21 | AND CASE WHEN household_demographics.hd_vehicle_count > 0 22 | THEN 23 | household_demographics.hd_dep_count / household_demographics.hd_vehicle_count 24 | ELSE NULL END > 1 25 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) 26 | AND store.s_county IN ('Williamson County', 'Franklin Parish', 'Bronx County', 'Orange County') 27 | GROUP BY ss_ticket_number, ss_customer_sk) dj, customer 28 | WHERE ss_customer_sk = c_customer_sk 29 | AND cnt BETWEEN 1 AND 5 30 | ORDER BY cnt DESC 31 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q74.sql: -------------------------------------------------------------------------------- 1 | WITH year_total AS ( 2 | SELECT 3 | c_customer_id customer_id, 4 | c_first_name customer_first_name, 5 | c_last_name customer_last_name, 6 | d_year AS year, 7 | sum(ss_net_paid) year_total, 8 | 's' sale_type 9 | FROM 10 | customer, store_sales, date_dim 11 | WHERE c_customer_sk = ss_customer_sk 12 | AND ss_sold_date_sk = d_date_sk 13 | AND d_year IN (2001, 2001 + 1) 14 | GROUP BY 15 | c_customer_id, c_first_name, c_last_name, d_year 16 | UNION ALL 17 | SELECT 18 | c_customer_id customer_id, 19 | c_first_name customer_first_name, 20 | c_last_name customer_last_name, 21 | d_year AS year, 22 | sum(ws_net_paid) year_total, 23 | 'w' sale_type 24 | FROM 25 | customer, web_sales, date_dim 26 | WHERE c_customer_sk = ws_bill_customer_sk 27 | AND ws_sold_date_sk = d_date_sk 28 | AND d_year IN (2001, 2001 + 1) 29 | GROUP BY 30 | c_customer_id, c_first_name, c_last_name, d_year) 31 | SELECT 32 | t_s_secyear.customer_id, 33 | t_s_secyear.customer_first_name, 34 | t_s_secyear.customer_last_name 35 | FROM 36 | year_total t_s_firstyear, year_total t_s_secyear, 37 | year_total t_w_firstyear, year_total t_w_secyear 38 | WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id 39 | AND t_s_firstyear.customer_id = t_w_secyear.customer_id 40 | AND t_s_firstyear.customer_id = t_w_firstyear.customer_id 41 | AND t_s_firstyear.sale_type = 's' 42 | AND t_w_firstyear.sale_type = 'w' 43 | AND t_s_secyear.sale_type = 's' 44 | AND t_w_secyear.sale_type = 'w' 45 | AND t_s_firstyear.year = 2001 46 | AND t_s_secyear.year = 2001 + 1 47 | AND t_w_firstyear.year = 2001 48 | AND t_w_secyear.year = 2001 + 1 49 | AND t_s_firstyear.year_total > 0 50 | AND t_w_firstyear.year_total > 0 51 | AND CASE WHEN t_w_firstyear.year_total > 0 52 | THEN t_w_secyear.year_total / t_w_firstyear.year_total 53 | ELSE NULL END 54 | > CASE WHEN t_s_firstyear.year_total > 0 55 | THEN t_s_secyear.year_total / t_s_firstyear.year_total 56 | ELSE NULL END 57 | ORDER BY 1, 1, 1 58 | LIMIT 100 59 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q75.sql: -------------------------------------------------------------------------------- 1 | WITH all_sales AS ( 2 | SELECT 3 | d_year, 4 | i_brand_id, 5 | i_class_id, 6 | i_category_id, 7 | i_manufact_id, 8 | SUM(sales_cnt) AS sales_cnt, 9 | SUM(sales_amt) AS sales_amt 10 | FROM ( 11 | SELECT 12 | d_year, 13 | i_brand_id, 14 | i_class_id, 15 | i_category_id, 16 | i_manufact_id, 17 | cs_quantity - COALESCE(cr_return_quantity, 0) AS sales_cnt, 18 | cs_ext_sales_price - COALESCE(cr_return_amount, 0.0) AS sales_amt 19 | FROM catalog_sales 20 | JOIN item ON i_item_sk = cs_item_sk 21 | JOIN date_dim ON d_date_sk = cs_sold_date_sk 22 | LEFT JOIN catalog_returns ON (cs_order_number = cr_order_number 23 | AND cs_item_sk = cr_item_sk) 24 | WHERE i_category = 'Books' 25 | UNION 26 | SELECT 27 | d_year, 28 | i_brand_id, 29 | i_class_id, 30 | i_category_id, 31 | i_manufact_id, 32 | ss_quantity - COALESCE(sr_return_quantity, 0) AS sales_cnt, 33 | ss_ext_sales_price - COALESCE(sr_return_amt, 0.0) AS sales_amt 34 | FROM store_sales 35 | JOIN item ON i_item_sk = ss_item_sk 36 | JOIN date_dim ON d_date_sk = ss_sold_date_sk 37 | LEFT JOIN store_returns ON (ss_ticket_number = sr_ticket_number 38 | AND ss_item_sk = sr_item_sk) 39 | WHERE i_category = 'Books' 40 | UNION 41 | SELECT 42 | d_year, 43 | i_brand_id, 44 | i_class_id, 45 | i_category_id, 46 | i_manufact_id, 47 | ws_quantity - COALESCE(wr_return_quantity, 0) AS sales_cnt, 48 | ws_ext_sales_price - COALESCE(wr_return_amt, 0.0) AS sales_amt 49 | FROM web_sales 50 | JOIN item ON i_item_sk = ws_item_sk 51 | JOIN date_dim ON d_date_sk = ws_sold_date_sk 52 | LEFT JOIN web_returns ON (ws_order_number = wr_order_number 53 | AND ws_item_sk = wr_item_sk) 54 | WHERE i_category = 'Books') sales_detail 55 | GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) 56 | SELECT 57 | prev_yr.d_year AS prev_year, 58 | curr_yr.d_year AS year, 59 | curr_yr.i_brand_id, 60 | curr_yr.i_class_id, 61 | curr_yr.i_category_id, 62 | curr_yr.i_manufact_id, 63 | prev_yr.sales_cnt AS prev_yr_cnt, 64 | curr_yr.sales_cnt AS curr_yr_cnt, 65 | curr_yr.sales_cnt - prev_yr.sales_cnt AS sales_cnt_diff, 66 | curr_yr.sales_amt - prev_yr.sales_amt AS sales_amt_diff 67 | FROM all_sales curr_yr, all_sales prev_yr 68 | WHERE curr_yr.i_brand_id = prev_yr.i_brand_id 69 | AND curr_yr.i_class_id = prev_yr.i_class_id 70 | AND curr_yr.i_category_id = prev_yr.i_category_id 71 | AND curr_yr.i_manufact_id = prev_yr.i_manufact_id 72 | AND curr_yr.d_year = 2002 73 | AND prev_yr.d_year = 2002 - 1 74 | AND CAST(curr_yr.sales_cnt AS DECIMAL(17, 2)) / CAST(prev_yr.sales_cnt AS DECIMAL(17, 2)) < 0.9 75 | ORDER BY sales_cnt_diff 76 | LIMIT 100 77 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q76.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | channel, 3 | col_name, 4 | d_year, 5 | d_qoy, 6 | i_category, 7 | COUNT(*) sales_cnt, 8 | SUM(ext_sales_price) sales_amt 9 | FROM ( 10 | SELECT 11 | 'store' AS channel, 12 | ss_store_sk col_name, 13 | d_year, 14 | d_qoy, 15 | i_category, 16 | ss_ext_sales_price ext_sales_price 17 | FROM store_sales, item, date_dim 18 | WHERE ss_store_sk IS NULL 19 | AND ss_sold_date_sk = d_date_sk 20 | AND ss_item_sk = i_item_sk 21 | UNION ALL 22 | SELECT 23 | 'web' AS channel, 24 | ws_ship_customer_sk col_name, 25 | d_year, 26 | d_qoy, 27 | i_category, 28 | ws_ext_sales_price ext_sales_price 29 | FROM web_sales, item, date_dim 30 | WHERE ws_ship_customer_sk IS NULL 31 | AND ws_sold_date_sk = d_date_sk 32 | AND ws_item_sk = i_item_sk 33 | UNION ALL 34 | SELECT 35 | 'catalog' AS channel, 36 | cs_ship_addr_sk col_name, 37 | d_year, 38 | d_qoy, 39 | i_category, 40 | cs_ext_sales_price ext_sales_price 41 | FROM catalog_sales, item, date_dim 42 | WHERE cs_ship_addr_sk IS NULL 43 | AND cs_sold_date_sk = d_date_sk 44 | AND cs_item_sk = i_item_sk) foo 45 | GROUP BY channel, col_name, d_year, d_qoy, i_category 46 | ORDER BY channel, col_name, d_year, d_qoy, i_category 47 | LIMIT 100 48 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q77.sql: -------------------------------------------------------------------------------- 1 | WITH ss AS 2 | (SELECT 3 | s_store_sk, 4 | sum(ss_ext_sales_price) AS sales, 5 | sum(ss_net_profit) AS profit 6 | FROM store_sales, date_dim, store 7 | WHERE ss_sold_date_sk = d_date_sk 8 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 9 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days) 10 | AND ss_store_sk = s_store_sk 11 | GROUP BY s_store_sk), 12 | sr AS 13 | (SELECT 14 | s_store_sk, 15 | sum(sr_return_amt) AS returns, 16 | sum(sr_net_loss) AS profit_loss 17 | FROM store_returns, date_dim, store 18 | WHERE sr_returned_date_sk = d_date_sk 19 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 20 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days) 21 | AND sr_store_sk = s_store_sk 22 | GROUP BY s_store_sk), 23 | cs AS 24 | (SELECT 25 | cs_call_center_sk, 26 | sum(cs_ext_sales_price) AS sales, 27 | sum(cs_net_profit) AS profit 28 | FROM catalog_sales, date_dim 29 | WHERE cs_sold_date_sk = d_date_sk 30 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 31 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days) 32 | GROUP BY cs_call_center_sk), 33 | cr AS 34 | (SELECT 35 | sum(cr_return_amount) AS returns, 36 | sum(cr_net_loss) AS profit_loss 37 | FROM catalog_returns, date_dim 38 | WHERE cr_returned_date_sk = d_date_sk 39 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 40 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days)), 41 | ws AS 42 | (SELECT 43 | wp_web_page_sk, 44 | sum(ws_ext_sales_price) AS sales, 45 | sum(ws_net_profit) AS profit 46 | FROM web_sales, date_dim, web_page 47 | WHERE ws_sold_date_sk = d_date_sk 48 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 49 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days) 50 | AND ws_web_page_sk = wp_web_page_sk 51 | GROUP BY wp_web_page_sk), 52 | wr AS 53 | (SELECT 54 | wp_web_page_sk, 55 | sum(wr_return_amt) AS returns, 56 | sum(wr_net_loss) AS profit_loss 57 | FROM web_returns, date_dim, web_page 58 | WHERE wr_returned_date_sk = d_date_sk 59 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 60 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days) 61 | AND wr_web_page_sk = wp_web_page_sk 62 | GROUP BY wp_web_page_sk) 63 | SELECT 64 | channel, 65 | id, 66 | sum(sales) AS sales, 67 | sum(returns) AS returns, 68 | sum(profit) AS profit 69 | FROM 70 | (SELECT 71 | 'store channel' AS channel, 72 | ss.s_store_sk AS id, 73 | sales, 74 | coalesce(returns, 0) AS returns, 75 | (profit - coalesce(profit_loss, 0)) AS profit 76 | FROM ss 77 | LEFT JOIN sr 78 | ON ss.s_store_sk = sr.s_store_sk 79 | UNION ALL 80 | SELECT 81 | 'catalog channel' AS channel, 82 | cs_call_center_sk AS id, 83 | sales, 84 | returns, 85 | (profit - profit_loss) AS profit 86 | FROM cs, cr 87 | UNION ALL 88 | SELECT 89 | 'web channel' AS channel, 90 | ws.wp_web_page_sk AS id, 91 | sales, 92 | coalesce(returns, 0) returns, 93 | (profit - coalesce(profit_loss, 0)) AS profit 94 | FROM ws 95 | LEFT JOIN wr 96 | ON ws.wp_web_page_sk = wr.wp_web_page_sk 97 | ) x 98 | GROUP BY ROLLUP (channel, id) 99 | ORDER BY channel, id 100 | LIMIT 100 101 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q78.sql: -------------------------------------------------------------------------------- 1 | WITH ws AS 2 | (SELECT 3 | d_year AS ws_sold_year, 4 | ws_item_sk, 5 | ws_bill_customer_sk ws_customer_sk, 6 | sum(ws_quantity) ws_qty, 7 | sum(ws_wholesale_cost) ws_wc, 8 | sum(ws_sales_price) ws_sp 9 | FROM web_sales 10 | LEFT JOIN web_returns ON wr_order_number = ws_order_number AND ws_item_sk = wr_item_sk 11 | JOIN date_dim ON ws_sold_date_sk = d_date_sk 12 | WHERE wr_order_number IS NULL 13 | GROUP BY d_year, ws_item_sk, ws_bill_customer_sk 14 | ), 15 | cs AS 16 | (SELECT 17 | d_year AS cs_sold_year, 18 | cs_item_sk, 19 | cs_bill_customer_sk cs_customer_sk, 20 | sum(cs_quantity) cs_qty, 21 | sum(cs_wholesale_cost) cs_wc, 22 | sum(cs_sales_price) cs_sp 23 | FROM catalog_sales 24 | LEFT JOIN catalog_returns ON cr_order_number = cs_order_number AND cs_item_sk = cr_item_sk 25 | JOIN date_dim ON cs_sold_date_sk = d_date_sk 26 | WHERE cr_order_number IS NULL 27 | GROUP BY d_year, cs_item_sk, cs_bill_customer_sk 28 | ), 29 | ss AS 30 | (SELECT 31 | d_year AS ss_sold_year, 32 | ss_item_sk, 33 | ss_customer_sk, 34 | sum(ss_quantity) ss_qty, 35 | sum(ss_wholesale_cost) ss_wc, 36 | sum(ss_sales_price) ss_sp 37 | FROM store_sales 38 | LEFT JOIN store_returns ON sr_ticket_number = ss_ticket_number AND ss_item_sk = sr_item_sk 39 | JOIN date_dim ON ss_sold_date_sk = d_date_sk 40 | WHERE sr_ticket_number IS NULL 41 | GROUP BY d_year, ss_item_sk, ss_customer_sk 42 | ) 43 | SELECT 44 | round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) ratio, 45 | ss_qty store_qty, 46 | ss_wc store_wholesale_cost, 47 | ss_sp store_sales_price, 48 | coalesce(ws_qty, 0) + coalesce(cs_qty, 0) other_chan_qty, 49 | coalesce(ws_wc, 0) + coalesce(cs_wc, 0) other_chan_wholesale_cost, 50 | coalesce(ws_sp, 0) + coalesce(cs_sp, 0) other_chan_sales_price 51 | FROM ss 52 | LEFT JOIN ws 53 | ON (ws_sold_year = ss_sold_year AND ws_item_sk = ss_item_sk AND ws_customer_sk = ss_customer_sk) 54 | LEFT JOIN cs 55 | ON (cs_sold_year = ss_sold_year AND cs_item_sk = ss_item_sk AND cs_customer_sk = ss_customer_sk) 56 | WHERE coalesce(ws_qty, 0) > 0 AND coalesce(cs_qty, 0) > 0 AND ss_sold_year = 2000 57 | ORDER BY 58 | ratio, 59 | ss_qty DESC, ss_wc DESC, ss_sp DESC, 60 | other_chan_qty, 61 | other_chan_wholesale_cost, 62 | other_chan_sales_price, 63 | round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) 64 | LIMIT 100 65 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q79.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_last_name, 3 | c_first_name, 4 | substr(s_city, 1, 30), 5 | ss_ticket_number, 6 | amt, 7 | profit 8 | FROM 9 | (SELECT 10 | ss_ticket_number, 11 | ss_customer_sk, 12 | store.s_city, 13 | sum(ss_coupon_amt) amt, 14 | sum(ss_net_profit) profit 15 | FROM store_sales, date_dim, store, household_demographics 16 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 17 | AND store_sales.ss_store_sk = store.s_store_sk 18 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 19 | AND (household_demographics.hd_dep_count = 6 OR 20 | household_demographics.hd_vehicle_count > 2) 21 | AND date_dim.d_dow = 1 22 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) 23 | AND store.s_number_employees BETWEEN 200 AND 295 24 | GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, store.s_city) ms, customer 25 | WHERE ss_customer_sk = c_customer_sk 26 | ORDER BY c_last_name, c_first_name, substr(s_city, 1, 30), profit 27 | LIMIT 100 28 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q80.sql: -------------------------------------------------------------------------------- 1 | WITH ssr AS 2 | (SELECT 3 | s_store_id AS store_id, 4 | sum(ss_ext_sales_price) AS sales, 5 | sum(coalesce(sr_return_amt, 0)) AS returns, 6 | sum(ss_net_profit - coalesce(sr_net_loss, 0)) AS profit 7 | FROM store_sales 8 | LEFT OUTER JOIN store_returns ON 9 | (ss_item_sk = sr_item_sk AND 10 | ss_ticket_number = sr_ticket_number) 11 | , 12 | date_dim, store, item, promotion 13 | WHERE ss_sold_date_sk = d_date_sk 14 | AND d_date BETWEEN cast('2000-08-23' AS DATE) 15 | AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) 16 | AND ss_store_sk = s_store_sk 17 | AND ss_item_sk = i_item_sk 18 | AND i_current_price > 50 19 | AND ss_promo_sk = p_promo_sk 20 | AND p_channel_tv = 'N' 21 | GROUP BY s_store_id), 22 | csr AS 23 | (SELECT 24 | cp_catalog_page_id AS catalog_page_id, 25 | sum(cs_ext_sales_price) AS sales, 26 | sum(coalesce(cr_return_amount, 0)) AS returns, 27 | sum(cs_net_profit - coalesce(cr_net_loss, 0)) AS profit 28 | FROM catalog_sales 29 | LEFT OUTER JOIN catalog_returns ON 30 | (cs_item_sk = cr_item_sk AND 31 | cs_order_number = cr_order_number) 32 | , 33 | date_dim, catalog_page, item, promotion 34 | WHERE cs_sold_date_sk = d_date_sk 35 | AND d_date BETWEEN cast('2000-08-23' AS DATE) 36 | AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) 37 | AND cs_catalog_page_sk = cp_catalog_page_sk 38 | AND cs_item_sk = i_item_sk 39 | AND i_current_price > 50 40 | AND cs_promo_sk = p_promo_sk 41 | AND p_channel_tv = 'N' 42 | GROUP BY cp_catalog_page_id), 43 | wsr AS 44 | (SELECT 45 | web_site_id, 46 | sum(ws_ext_sales_price) AS sales, 47 | sum(coalesce(wr_return_amt, 0)) AS returns, 48 | sum(ws_net_profit - coalesce(wr_net_loss, 0)) AS profit 49 | FROM web_sales 50 | LEFT OUTER JOIN web_returns ON 51 | (ws_item_sk = wr_item_sk AND ws_order_number = wr_order_number) 52 | , 53 | date_dim, web_site, item, promotion 54 | WHERE ws_sold_date_sk = d_date_sk 55 | AND d_date BETWEEN cast('2000-08-23' AS DATE) 56 | AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) 57 | AND ws_web_site_sk = web_site_sk 58 | AND ws_item_sk = i_item_sk 59 | AND i_current_price > 50 60 | AND ws_promo_sk = p_promo_sk 61 | AND p_channel_tv = 'N' 62 | GROUP BY web_site_id) 63 | SELECT 64 | channel, 65 | id, 66 | sum(sales) AS sales, 67 | sum(returns) AS returns, 68 | sum(profit) AS profit 69 | FROM (SELECT 70 | 'store channel' AS channel, 71 | concat('store', store_id) AS id, 72 | sales, 73 | returns, 74 | profit 75 | FROM ssr 76 | UNION ALL 77 | SELECT 78 | 'catalog channel' AS channel, 79 | concat('catalog_page', catalog_page_id) AS id, 80 | sales, 81 | returns, 82 | profit 83 | FROM csr 84 | UNION ALL 85 | SELECT 86 | 'web channel' AS channel, 87 | concat('web_site', web_site_id) AS id, 88 | sales, 89 | returns, 90 | profit 91 | FROM wsr) x 92 | GROUP BY ROLLUP (channel, id) 93 | ORDER BY channel, id 94 | LIMIT 100 95 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q81.sql: -------------------------------------------------------------------------------- 1 | WITH customer_total_return AS 2 | (SELECT 3 | cr_returning_customer_sk AS ctr_customer_sk, 4 | ca_state AS ctr_state, 5 | sum(cr_return_amt_inc_tax) AS ctr_total_return 6 | FROM catalog_returns, date_dim, customer_address 7 | WHERE cr_returned_date_sk = d_date_sk 8 | AND d_year = 2000 9 | AND cr_returning_addr_sk = ca_address_sk 10 | GROUP BY cr_returning_customer_sk, ca_state ) 11 | SELECT 12 | c_customer_id, 13 | c_salutation, 14 | c_first_name, 15 | c_last_name, 16 | ca_street_number, 17 | ca_street_name, 18 | ca_street_type, 19 | ca_suite_number, 20 | ca_city, 21 | ca_county, 22 | ca_state, 23 | ca_zip, 24 | ca_country, 25 | ca_gmt_offset, 26 | ca_location_type, 27 | ctr_total_return 28 | FROM customer_total_return ctr1, customer_address, customer 29 | WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 30 | FROM customer_total_return ctr2 31 | WHERE ctr1.ctr_state = ctr2.ctr_state) 32 | AND ca_address_sk = c_current_addr_sk 33 | AND ca_state = 'GA' 34 | AND ctr1.ctr_customer_sk = c_customer_sk 35 | ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name 36 | , ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset 37 | , ca_location_type, ctr_total_return 38 | LIMIT 100 39 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q82.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | i_item_desc, 4 | i_current_price 5 | FROM item, inventory, date_dim, store_sales 6 | WHERE i_current_price BETWEEN 62 AND 62 + 30 7 | AND inv_item_sk = i_item_sk 8 | AND d_date_sk = inv_date_sk 9 | AND d_date BETWEEN cast('2000-05-25' AS DATE) AND (cast('2000-05-25' AS DATE) + INTERVAL 60 days) 10 | AND i_manufact_id IN (129, 270, 821, 423) 11 | AND inv_quantity_on_hand BETWEEN 100 AND 500 12 | AND ss_item_sk = i_item_sk 13 | GROUP BY i_item_id, i_item_desc, i_current_price 14 | ORDER BY i_item_id 15 | LIMIT 100 16 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q83.sql: -------------------------------------------------------------------------------- 1 | WITH sr_items AS 2 | (SELECT 3 | i_item_id item_id, 4 | sum(sr_return_quantity) sr_item_qty 5 | FROM store_returns, item, date_dim 6 | WHERE sr_item_sk = i_item_sk 7 | AND d_date IN (SELECT d_date 8 | FROM date_dim 9 | WHERE d_week_seq IN 10 | (SELECT d_week_seq 11 | FROM date_dim 12 | WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) 13 | AND sr_returned_date_sk = d_date_sk 14 | GROUP BY i_item_id), 15 | cr_items AS 16 | (SELECT 17 | i_item_id item_id, 18 | sum(cr_return_quantity) cr_item_qty 19 | FROM catalog_returns, item, date_dim 20 | WHERE cr_item_sk = i_item_sk 21 | AND d_date IN (SELECT d_date 22 | FROM date_dim 23 | WHERE d_week_seq IN 24 | (SELECT d_week_seq 25 | FROM date_dim 26 | WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) 27 | AND cr_returned_date_sk = d_date_sk 28 | GROUP BY i_item_id), 29 | wr_items AS 30 | (SELECT 31 | i_item_id item_id, 32 | sum(wr_return_quantity) wr_item_qty 33 | FROM web_returns, item, date_dim 34 | WHERE wr_item_sk = i_item_sk AND d_date IN 35 | (SELECT d_date 36 | FROM date_dim 37 | WHERE d_week_seq IN 38 | (SELECT d_week_seq 39 | FROM date_dim 40 | WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) 41 | AND wr_returned_date_sk = d_date_sk 42 | GROUP BY i_item_id) 43 | SELECT 44 | sr_items.item_id, 45 | sr_item_qty, 46 | sr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 sr_dev, 47 | cr_item_qty, 48 | cr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 cr_dev, 49 | wr_item_qty, 50 | wr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 wr_dev, 51 | (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 average 52 | FROM sr_items, cr_items, wr_items 53 | WHERE sr_items.item_id = cr_items.item_id 54 | AND sr_items.item_id = wr_items.item_id 55 | ORDER BY sr_items.item_id, sr_item_qty 56 | LIMIT 100 57 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q84.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_customer_id AS customer_id, 3 | concat(c_last_name, ', ', c_first_name) AS customername 4 | FROM customer 5 | , customer_address 6 | , customer_demographics 7 | , household_demographics 8 | , income_band 9 | , store_returns 10 | WHERE ca_city = 'Edgewood' 11 | AND c_current_addr_sk = ca_address_sk 12 | AND ib_lower_bound >= 38128 13 | AND ib_upper_bound <= 38128 + 50000 14 | AND ib_income_band_sk = hd_income_band_sk 15 | AND cd_demo_sk = c_current_cdemo_sk 16 | AND hd_demo_sk = c_current_hdemo_sk 17 | AND sr_cdemo_sk = cd_demo_sk 18 | ORDER BY c_customer_id 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q85.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | substr(r_reason_desc, 1, 20), 3 | avg(ws_quantity), 4 | avg(wr_refunded_cash), 5 | avg(wr_fee) 6 | FROM web_sales, web_returns, web_page, customer_demographics cd1, 7 | customer_demographics cd2, customer_address, date_dim, reason 8 | WHERE ws_web_page_sk = wp_web_page_sk 9 | AND ws_item_sk = wr_item_sk 10 | AND ws_order_number = wr_order_number 11 | AND ws_sold_date_sk = d_date_sk AND d_year = 2000 12 | AND cd1.cd_demo_sk = wr_refunded_cdemo_sk 13 | AND cd2.cd_demo_sk = wr_returning_cdemo_sk 14 | AND ca_address_sk = wr_refunded_addr_sk 15 | AND r_reason_sk = wr_reason_sk 16 | AND 17 | ( 18 | ( 19 | cd1.cd_marital_status = 'M' 20 | AND 21 | cd1.cd_marital_status = cd2.cd_marital_status 22 | AND 23 | cd1.cd_education_status = 'Advanced Degree' 24 | AND 25 | cd1.cd_education_status = cd2.cd_education_status 26 | AND 27 | ws_sales_price BETWEEN 100.00 AND 150.00 28 | ) 29 | OR 30 | ( 31 | cd1.cd_marital_status = 'S' 32 | AND 33 | cd1.cd_marital_status = cd2.cd_marital_status 34 | AND 35 | cd1.cd_education_status = 'College' 36 | AND 37 | cd1.cd_education_status = cd2.cd_education_status 38 | AND 39 | ws_sales_price BETWEEN 50.00 AND 100.00 40 | ) 41 | OR 42 | ( 43 | cd1.cd_marital_status = 'W' 44 | AND 45 | cd1.cd_marital_status = cd2.cd_marital_status 46 | AND 47 | cd1.cd_education_status = '2 yr Degree' 48 | AND 49 | cd1.cd_education_status = cd2.cd_education_status 50 | AND 51 | ws_sales_price BETWEEN 150.00 AND 200.00 52 | ) 53 | ) 54 | AND 55 | ( 56 | ( 57 | ca_country = 'United States' 58 | AND 59 | ca_state IN ('IN', 'OH', 'NJ') 60 | AND ws_net_profit BETWEEN 100 AND 200 61 | ) 62 | OR 63 | ( 64 | ca_country = 'United States' 65 | AND 66 | ca_state IN ('WI', 'CT', 'KY') 67 | AND ws_net_profit BETWEEN 150 AND 300 68 | ) 69 | OR 70 | ( 71 | ca_country = 'United States' 72 | AND 73 | ca_state IN ('LA', 'IA', 'AR') 74 | AND ws_net_profit BETWEEN 50 AND 250 75 | ) 76 | ) 77 | GROUP BY r_reason_desc 78 | ORDER BY substr(r_reason_desc, 1, 20) 79 | , avg(ws_quantity) 80 | , avg(wr_refunded_cash) 81 | , avg(wr_fee) 82 | LIMIT 100 83 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q86.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sum(ws_net_paid) AS total_sum, 3 | i_category, 4 | i_class, 5 | grouping(i_category) + grouping(i_class) AS lochierarchy, 6 | rank() 7 | OVER ( 8 | PARTITION BY grouping(i_category) + grouping(i_class), 9 | CASE WHEN grouping(i_class) = 0 10 | THEN i_category END 11 | ORDER BY sum(ws_net_paid) DESC) AS rank_within_parent 12 | FROM 13 | web_sales, date_dim d1, item 14 | WHERE 15 | d1.d_month_seq BETWEEN 1200 AND 1200 + 11 16 | AND d1.d_date_sk = ws_sold_date_sk 17 | AND i_item_sk = ws_item_sk 18 | GROUP BY ROLLUP (i_category, i_class) 19 | ORDER BY 20 | lochierarchy DESC, 21 | CASE WHEN lochierarchy = 0 22 | THEN i_category END, 23 | rank_within_parent 24 | LIMIT 100 25 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q87.sql: -------------------------------------------------------------------------------- 1 | SELECT count(*) 2 | FROM ((SELECT DISTINCT 3 | c_last_name, 4 | c_first_name, 5 | d_date 6 | FROM store_sales, date_dim, customer 7 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 8 | AND store_sales.ss_customer_sk = customer.c_customer_sk 9 | AND d_month_seq BETWEEN 1200 AND 1200 + 11) 10 | EXCEPT 11 | (SELECT DISTINCT 12 | c_last_name, 13 | c_first_name, 14 | d_date 15 | FROM catalog_sales, date_dim, customer 16 | WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 17 | AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 18 | AND d_month_seq BETWEEN 1200 AND 1200 + 11) 19 | EXCEPT 20 | (SELECT DISTINCT 21 | c_last_name, 22 | c_first_name, 23 | d_date 24 | FROM web_sales, date_dim, customer 25 | WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk 26 | AND web_sales.ws_bill_customer_sk = customer.c_customer_sk 27 | AND d_month_seq BETWEEN 1200 AND 1200 + 11) 28 | ) cool_cust 29 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q89.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM ( 3 | SELECT 4 | i_category, 5 | i_class, 6 | i_brand, 7 | s_store_name, 8 | s_company_name, 9 | d_moy, 10 | sum(ss_sales_price) sum_sales, 11 | avg(sum(ss_sales_price)) 12 | OVER 13 | (PARTITION BY i_category, i_brand, s_store_name, s_company_name) 14 | avg_monthly_sales 15 | FROM item, store_sales, date_dim, store 16 | WHERE ss_item_sk = i_item_sk AND 17 | ss_sold_date_sk = d_date_sk AND 18 | ss_store_sk = s_store_sk AND 19 | d_year IN (1999) AND 20 | ((i_category IN ('Books', 'Electronics', 'Sports') AND 21 | i_class IN ('computers', 'stereo', 'football')) 22 | OR (i_category IN ('Men', 'Jewelry', 'Women') AND 23 | i_class IN ('shirts', 'birdal', 'dresses'))) 24 | GROUP BY i_category, i_class, i_brand, 25 | s_store_name, s_company_name, d_moy) tmp1 26 | WHERE CASE WHEN (avg_monthly_sales <> 0) 27 | THEN (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) 28 | ELSE NULL END > 0.1 29 | ORDER BY sum_sales - avg_monthly_sales, s_store_name 30 | LIMIT 100 31 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q9.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | CASE WHEN (SELECT count(*) 3 | FROM store_sales 4 | WHERE ss_quantity BETWEEN 1 AND 20) > 62316685 5 | THEN (SELECT avg(ss_ext_discount_amt) 6 | FROM store_sales 7 | WHERE ss_quantity BETWEEN 1 AND 20) 8 | ELSE (SELECT avg(ss_net_paid) 9 | FROM store_sales 10 | WHERE ss_quantity BETWEEN 1 AND 20) END bucket1, 11 | CASE WHEN (SELECT count(*) 12 | FROM store_sales 13 | WHERE ss_quantity BETWEEN 21 AND 40) > 19045798 14 | THEN (SELECT avg(ss_ext_discount_amt) 15 | FROM store_sales 16 | WHERE ss_quantity BETWEEN 21 AND 40) 17 | ELSE (SELECT avg(ss_net_paid) 18 | FROM store_sales 19 | WHERE ss_quantity BETWEEN 21 AND 40) END bucket2, 20 | CASE WHEN (SELECT count(*) 21 | FROM store_sales 22 | WHERE ss_quantity BETWEEN 41 AND 60) > 365541424 23 | THEN (SELECT avg(ss_ext_discount_amt) 24 | FROM store_sales 25 | WHERE ss_quantity BETWEEN 41 AND 60) 26 | ELSE (SELECT avg(ss_net_paid) 27 | FROM store_sales 28 | WHERE ss_quantity BETWEEN 41 AND 60) END bucket3, 29 | CASE WHEN (SELECT count(*) 30 | FROM store_sales 31 | WHERE ss_quantity BETWEEN 61 AND 80) > 216357808 32 | THEN (SELECT avg(ss_ext_discount_amt) 33 | FROM store_sales 34 | WHERE ss_quantity BETWEEN 61 AND 80) 35 | ELSE (SELECT avg(ss_net_paid) 36 | FROM store_sales 37 | WHERE ss_quantity BETWEEN 61 AND 80) END bucket4, 38 | CASE WHEN (SELECT count(*) 39 | FROM store_sales 40 | WHERE ss_quantity BETWEEN 81 AND 100) > 184483884 41 | THEN (SELECT avg(ss_ext_discount_amt) 42 | FROM store_sales 43 | WHERE ss_quantity BETWEEN 81 AND 100) 44 | ELSE (SELECT avg(ss_net_paid) 45 | FROM store_sales 46 | WHERE ss_quantity BETWEEN 81 AND 100) END bucket5 47 | FROM reason 48 | WHERE r_reason_sk = 1 49 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q90.sql: -------------------------------------------------------------------------------- 1 | SELECT cast(amc AS DECIMAL(15, 4)) / cast(pmc AS DECIMAL(15, 4)) am_pm_ratio 2 | FROM (SELECT count(*) amc 3 | FROM web_sales, household_demographics, time_dim, web_page 4 | WHERE ws_sold_time_sk = time_dim.t_time_sk 5 | AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk 6 | AND ws_web_page_sk = web_page.wp_web_page_sk 7 | AND time_dim.t_hour BETWEEN 8 AND 8 + 1 8 | AND household_demographics.hd_dep_count = 6 9 | AND web_page.wp_char_count BETWEEN 5000 AND 5200) at, 10 | (SELECT count(*) pmc 11 | FROM web_sales, household_demographics, time_dim, web_page 12 | WHERE ws_sold_time_sk = time_dim.t_time_sk 13 | AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk 14 | AND ws_web_page_sk = web_page.wp_web_page_sk 15 | AND time_dim.t_hour BETWEEN 19 AND 19 + 1 16 | AND household_demographics.hd_dep_count = 6 17 | AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt 18 | ORDER BY am_pm_ratio 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q91.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cc_call_center_id Call_Center, 3 | cc_name Call_Center_Name, 4 | cc_manager Manager, 5 | sum(cr_net_loss) Returns_Loss 6 | FROM 7 | call_center, catalog_returns, date_dim, customer, customer_address, 8 | customer_demographics, household_demographics 9 | WHERE 10 | cr_call_center_sk = cc_call_center_sk 11 | AND cr_returned_date_sk = d_date_sk 12 | AND cr_returning_customer_sk = c_customer_sk 13 | AND cd_demo_sk = c_current_cdemo_sk 14 | AND hd_demo_sk = c_current_hdemo_sk 15 | AND ca_address_sk = c_current_addr_sk 16 | AND d_year = 1998 17 | AND d_moy = 11 18 | AND ((cd_marital_status = 'M' AND cd_education_status = 'Unknown') 19 | OR (cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree')) 20 | AND hd_buy_potential LIKE 'Unknown%' 21 | AND ca_gmt_offset = -7 22 | GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status 23 | ORDER BY sum(cr_net_loss) DESC 24 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q92.sql: -------------------------------------------------------------------------------- 1 | SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount ` 2 | FROM web_sales, item, date_dim 3 | WHERE i_manufact_id = 350 4 | AND i_item_sk = ws_item_sk 5 | AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) 6 | AND d_date_sk = ws_sold_date_sk 7 | AND ws_ext_discount_amt > 8 | ( 9 | SELECT 1.3 * avg(ws_ext_discount_amt) 10 | FROM web_sales, date_dim 11 | WHERE ws_item_sk = i_item_sk 12 | AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) 13 | AND d_date_sk = ws_sold_date_sk 14 | ) 15 | ORDER BY sum(ws_ext_discount_amt) 16 | LIMIT 100 17 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q93.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ss_customer_sk, 3 | sum(act_sales) sumsales 4 | FROM (SELECT 5 | ss_item_sk, 6 | ss_ticket_number, 7 | ss_customer_sk, 8 | CASE WHEN sr_return_quantity IS NOT NULL 9 | THEN (ss_quantity - sr_return_quantity) * ss_sales_price 10 | ELSE (ss_quantity * ss_sales_price) END act_sales 11 | FROM store_sales 12 | LEFT OUTER JOIN store_returns 13 | ON (sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number) 14 | , 15 | reason 16 | WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 28') t 17 | GROUP BY ss_customer_sk 18 | ORDER BY sumsales, ss_customer_sk 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q94.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | count(DISTINCT ws_order_number) AS `order count `, 3 | sum(ws_ext_ship_cost) AS `total shipping cost `, 4 | sum(ws_net_profit) AS `total net profit ` 5 | FROM 6 | web_sales ws1, date_dim, customer_address, web_site 7 | WHERE 8 | d_date BETWEEN '1999-02-01' AND 9 | (CAST('1999-02-01' AS DATE) + INTERVAL 60 days) 10 | AND ws1.ws_ship_date_sk = d_date_sk 11 | AND ws1.ws_ship_addr_sk = ca_address_sk 12 | AND ca_state = 'IL' 13 | AND ws1.ws_web_site_sk = web_site_sk 14 | AND web_company_name = 'pri' 15 | AND EXISTS(SELECT * 16 | FROM web_sales ws2 17 | WHERE ws1.ws_order_number = ws2.ws_order_number 18 | AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 19 | AND NOT EXISTS(SELECT * 20 | FROM web_returns wr1 21 | WHERE ws1.ws_order_number = wr1.wr_order_number) 22 | ORDER BY count(DISTINCT ws_order_number) 23 | LIMIT 100 24 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q95.sql: -------------------------------------------------------------------------------- 1 | WITH ws_wh AS 2 | (SELECT 3 | ws1.ws_order_number, 4 | ws1.ws_warehouse_sk wh1, 5 | ws2.ws_warehouse_sk wh2 6 | FROM web_sales ws1, web_sales ws2 7 | WHERE ws1.ws_order_number = ws2.ws_order_number 8 | AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 9 | SELECT 10 | count(DISTINCT ws_order_number) AS `order count `, 11 | sum(ws_ext_ship_cost) AS `total shipping cost `, 12 | sum(ws_net_profit) AS `total net profit ` 13 | FROM 14 | web_sales ws1, date_dim, customer_address, web_site 15 | WHERE 16 | d_date BETWEEN '1999-02-01' AND 17 | (CAST('1999-02-01' AS DATE) + INTERVAL 60 DAY) 18 | AND ws1.ws_ship_date_sk = d_date_sk 19 | AND ws1.ws_ship_addr_sk = ca_address_sk 20 | AND ca_state = 'IL' 21 | AND ws1.ws_web_site_sk = web_site_sk 22 | AND web_company_name = 'pri' 23 | AND ws1.ws_order_number IN (SELECT ws_order_number 24 | FROM ws_wh) 25 | AND ws1.ws_order_number IN (SELECT wr_order_number 26 | FROM web_returns, ws_wh 27 | WHERE wr_order_number = ws_wh.ws_order_number) 28 | ORDER BY count(DISTINCT ws_order_number) 29 | LIMIT 100 30 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q96.sql: -------------------------------------------------------------------------------- 1 | SELECT count(*) 2 | FROM store_sales, household_demographics, time_dim, store 3 | WHERE ss_sold_time_sk = time_dim.t_time_sk 4 | AND ss_hdemo_sk = household_demographics.hd_demo_sk 5 | AND ss_store_sk = s_store_sk 6 | AND time_dim.t_hour = 20 7 | AND time_dim.t_minute >= 30 8 | AND household_demographics.hd_dep_count = 7 9 | AND store.s_store_name = 'ese' 10 | ORDER BY count(*) 11 | LIMIT 100 12 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q97.sql: -------------------------------------------------------------------------------- 1 | WITH ssci AS ( 2 | SELECT 3 | ss_customer_sk customer_sk, 4 | ss_item_sk item_sk 5 | FROM store_sales, date_dim 6 | WHERE ss_sold_date_sk = d_date_sk 7 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 8 | GROUP BY ss_customer_sk, ss_item_sk), 9 | csci AS ( 10 | SELECT 11 | cs_bill_customer_sk customer_sk, 12 | cs_item_sk item_sk 13 | FROM catalog_sales, date_dim 14 | WHERE cs_sold_date_sk = d_date_sk 15 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 16 | GROUP BY cs_bill_customer_sk, cs_item_sk) 17 | SELECT 18 | sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL 19 | THEN 1 20 | ELSE 0 END) store_only, 21 | sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL 22 | THEN 1 23 | ELSE 0 END) catalog_only, 24 | sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL 25 | THEN 1 26 | ELSE 0 END) store_and_catalog 27 | FROM ssci 28 | FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk 29 | AND ssci.item_sk = csci.item_sk) 30 | LIMIT 100 31 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q98.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_desc, 3 | i_category, 4 | i_class, 5 | i_current_price, 6 | sum(ss_ext_sales_price) AS itemrevenue, 7 | sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) 8 | OVER 9 | (PARTITION BY i_class) AS revenueratio 10 | FROM 11 | store_sales, item, date_dim 12 | WHERE 13 | ss_item_sk = i_item_sk 14 | AND i_category IN ('Sports', 'Books', 'Home') 15 | AND ss_sold_date_sk = d_date_sk 16 | AND d_date BETWEEN cast('1999-02-22' AS DATE) 17 | AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) 18 | GROUP BY 19 | i_item_id, i_item_desc, i_category, i_class, i_current_price 20 | ORDER BY 21 | i_category, i_class, i_item_id, i_item_desc, revenueratio 22 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/inputs/q99.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | substr(w_warehouse_name, 1, 20), 3 | sm_type, 4 | cc_name, 5 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30) 6 | THEN 1 7 | ELSE 0 END) AS `30 days `, 8 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND 9 | (cs_ship_date_sk - cs_sold_date_sk <= 60) 10 | THEN 1 11 | ELSE 0 END) AS `31 - 60 days `, 12 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND 13 | (cs_ship_date_sk - cs_sold_date_sk <= 90) 14 | THEN 1 15 | ELSE 0 END) AS `61 - 90 days `, 16 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND 17 | (cs_ship_date_sk - cs_sold_date_sk <= 120) 18 | THEN 1 19 | ELSE 0 END) AS `91 - 120 days `, 20 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120) 21 | THEN 1 22 | ELSE 0 END) AS `>120 days ` 23 | FROM 24 | catalog_sales, warehouse, ship_mode, call_center, date_dim 25 | WHERE 26 | d_month_seq BETWEEN 1200 AND 1200 + 11 27 | AND cs_ship_date_sk = d_date_sk 28 | AND cs_warehouse_sk = w_warehouse_sk 29 | AND cs_ship_mode_sk = sm_ship_mode_sk 30 | AND cs_call_center_sk = cc_call_center_sk 31 | GROUP BY 32 | substr(w_warehouse_name, 1, 20), sm_type, cc_name 33 | ORDER BY substr(w_warehouse_name, 1, 20), sm_type, cc_name 34 | LIMIT 100 35 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/results/q41-contracted.sql.dot: -------------------------------------------------------------------------------- 1 | // Automatically generated by TPCDSFlowTestSuite 2 | 3 | 4 | digraph { 5 | graph [pad="0.5" nodesep="0.5" ranksep="1" fontname="Helvetica" rankdir=LR]; 6 | node [shape=plaintext] 7 | 8 | 9 | "default.item" [color="black" label=< 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
default.item
i_item_sk
i_item_id
i_rec_start_date
i_rec_end_date
i_item_desc
i_current_price
i_wholesale_cost
i_brand_id
i_brand
i_class_id
i_class
i_category_id
i_category
i_manufact_id
i_manufact
i_size
i_formulation
i_color
i_units
i_container
i_manager_id
i_product_name
>]; 35 | 36 | 37 | "v" [color="black" label=< 38 | 39 | 40 | 41 |
v
i_product_name
>]; 42 | 43 | "default.item":21 -> "v":0; 44 | } 45 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/results/q44-contracted.sql.dot: -------------------------------------------------------------------------------- 1 | // Automatically generated by TPCDSFlowTestSuite 2 | 3 | 4 | digraph { 5 | graph [pad="0.5" nodesep="0.5" ranksep="1" fontname="Helvetica" rankdir=LR]; 6 | node [shape=plaintext] 7 | 8 | 9 | "default.item" [color="black" label=< 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
default.item
i_item_sk
i_item_id
i_rec_start_date
i_rec_end_date
i_item_desc
i_current_price
i_wholesale_cost
i_brand_id
i_brand
i_class_id
i_class
i_category_id
i_category
i_manufact_id
i_manufact
i_size
i_formulation
i_color
i_units
i_container
i_manager_id
i_product_name
>]; 35 | 36 | 37 | "default.store_sales" [color="black" label=< 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 |
default.store_sales
ss_sold_date_sk
ss_sold_time_sk
ss_item_sk
ss_customer_sk
ss_cdemo_sk
ss_hdemo_sk
ss_addr_sk
ss_store_sk
ss_promo_sk
ss_ticket_number
ss_quantity
ss_wholesale_cost
ss_list_price
ss_sales_price
ss_ext_discount_amt
ss_ext_sales_price
ss_ext_wholesale_cost
ss_ext_list_price
ss_ext_tax
ss_coupon_amt
ss_net_paid
ss_net_paid_inc_tax
ss_net_profit
>]; 64 | 65 | 66 | "v" [color="black" label=< 67 | 68 | 69 | 70 | 71 | 72 |
v
rnk
best_performing
worst_performing
>]; 73 | 74 | "default.item":21 -> "v":1; 75 | "default.store_sales":2 -> "v":nodeName; 76 | "default.store_sales":22 -> "v":nodeName; 77 | "default.store_sales":22 -> "v":nodeName; 78 | "default.store_sales":22 -> "v":nodeName; 79 | } 80 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/results/q9-contracted.sql.dot: -------------------------------------------------------------------------------- 1 | // Automatically generated by TPCDSFlowTestSuite 2 | 3 | 4 | digraph { 5 | graph [pad="0.5" nodesep="0.5" ranksep="1" fontname="Helvetica" rankdir=LR]; 6 | node [shape=plaintext] 7 | 8 | 9 | "default.reason" [color="black" label=< 10 | 11 | 12 | 13 | 14 | 15 |
default.reason
r_reason_sk
r_reason_id
r_reason_desc
>]; 16 | 17 | 18 | "default.store_sales" [color="black" label=< 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 |
default.store_sales
ss_sold_date_sk
ss_sold_time_sk
ss_item_sk
ss_customer_sk
ss_cdemo_sk
ss_hdemo_sk
ss_addr_sk
ss_store_sk
ss_promo_sk
ss_ticket_number
ss_quantity
ss_wholesale_cost
ss_list_price
ss_sales_price
ss_ext_discount_amt
ss_ext_sales_price
ss_ext_wholesale_cost
ss_ext_list_price
ss_ext_tax
ss_coupon_amt
ss_net_paid
ss_net_paid_inc_tax
ss_net_profit
>]; 45 | 46 | 47 | "v" [color="black" label=< 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 |
v
bucket1
bucket2
bucket3
bucket4
bucket5
>]; 56 | 57 | "default.store_sales":14 -> "v":nodeName; 58 | "default.store_sales":14 -> "v":nodeName; 59 | "default.store_sales":14 -> "v":nodeName; 60 | "default.store_sales":14 -> "v":nodeName; 61 | "default.store_sales":14 -> "v":nodeName; 62 | "default.store_sales":20 -> "v":nodeName; 63 | "default.store_sales":20 -> "v":nodeName; 64 | "default.store_sales":20 -> "v":nodeName; 65 | "default.store_sales":20 -> "v":nodeName; 66 | "default.store_sales":20 -> "v":nodeName; 67 | } 68 | -------------------------------------------------------------------------------- /src/test/resources/tpcds-flow-tests/results/q93-contracted.sql.dot: -------------------------------------------------------------------------------- 1 | // Automatically generated by TPCDSFlowTestSuite 2 | 3 | 4 | digraph { 5 | graph [pad="0.5" nodesep="0.5" ranksep="1" fontname="Helvetica" rankdir=LR]; 6 | node [shape=plaintext] 7 | 8 | 9 | "default.reason" [color="black" label=< 10 | 11 | 12 | 13 | 14 | 15 |
default.reason
r_reason_sk
r_reason_id
r_reason_desc
>]; 16 | 17 | 18 | "default.store_returns" [color="black" label=< 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 |
default.store_returns
sr_returned_date_sk
sr_return_time_sk
sr_item_sk
sr_customer_sk
sr_cdemo_sk
sr_hdemo_sk
sr_addr_sk
sr_store_sk
sr_reason_sk
sr_ticket_number
sr_return_quantity
sr_return_amt
sr_return_tax
sr_return_amt_inc_tax
sr_fee
sr_return_ship_cost
sr_refunded_cash
sr_reversed_charge
sr_store_credit
sr_net_loss
>]; 42 | 43 | 44 | "default.store_sales" [color="black" label=< 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 |
default.store_sales
ss_sold_date_sk
ss_sold_time_sk
ss_item_sk
ss_customer_sk
ss_cdemo_sk
ss_hdemo_sk
ss_addr_sk
ss_store_sk
ss_promo_sk
ss_ticket_number
ss_quantity
ss_wholesale_cost
ss_list_price
ss_sales_price
ss_ext_discount_amt
ss_ext_sales_price
ss_ext_wholesale_cost
ss_ext_list_price
ss_ext_tax
ss_coupon_amt
ss_net_paid
ss_net_paid_inc_tax
ss_net_profit
>]; 71 | 72 | 73 | "v" [color="black" label=< 74 | 75 | 76 | 77 | 78 |
v
ss_customer_sk
sumsales
>]; 79 | 80 | "default.reason":0 -> "v":nodeName; 81 | "default.store_returns":10 -> "v":1; 82 | "default.store_sales":10 -> "v":1; 83 | "default.store_sales":13 -> "v":1; 84 | "default.store_sales":3 -> "v":0; 85 | } 86 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/flow/Neo4jAuraTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.flow 19 | 20 | import org.scalactic.source.Position 21 | import org.scalatest.{BeforeAndAfterEach, Tag} 22 | import org.scalatest.funsuite.AnyFunSuiteLike 23 | 24 | import org.apache.spark.sql.flow.sink.Neo4jAura 25 | 26 | trait Neo4jAuraTest extends Neo4jAura with AnyFunSuiteLike with BeforeAndAfterEach { 27 | 28 | val uri = System.getenv("NEO4J_AURADB_URI") 29 | val user = System.getenv("NEO4J_AURADB_USER") 30 | val passwd = System.getenv("NEO4J_AURADB_PASSWD") 31 | 32 | private lazy val runTests = { 33 | uri != null && user != null && passwd != null 34 | } 35 | 36 | override def beforeEach(): Unit = { 37 | super.beforeEach() 38 | if (runTests) { 39 | resetNeo4jDbState() 40 | } 41 | } 42 | 43 | protected override def test(testName: String, testTags: Tag*) 44 | (testFun: => Any)(implicit pos: Position): Unit = { 45 | super.test(testName) { 46 | assume(runTests) 47 | testFun 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/flow/SQLFlowTestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.flow 19 | 20 | import org.apache.spark.sql.SparkSession 21 | import org.apache.spark.sql.util.QueryExecutionListener 22 | 23 | trait SQLFlowTestUtils { 24 | 25 | protected def withListener(listener: QueryExecutionListener)(f: => Unit): Unit = { 26 | val spark = SparkSession.getActiveSession.getOrElse { 27 | throw new IllegalStateException("Active SparkSession not found") 28 | } 29 | try { 30 | spark.listenerManager.register(listener) 31 | f 32 | } finally { 33 | spark.listenerManager.unregister(listener) 34 | } 35 | } 36 | 37 | protected def getOutputAsString(f: => Unit): String = { 38 | val output = new java.io.ByteArrayOutputStream() 39 | Console.withOut(output) { f } 40 | output.toString 41 | } 42 | 43 | private def normalize(s: String): String = { 44 | s.replaceAll("_[a-z0-9]{7}", "_x") 45 | .replaceAll("_[0-9]{1,2}", "_x") 46 | } 47 | 48 | private def extractEdgesFrom(s: String, edgeRegex: String): Set[String] = { 49 | edgeRegex.r.findAllIn(normalize(s)).toList.toSet 50 | } 51 | 52 | protected def checkOutputString(edgeRegex: String)(actual: String, expected: String): Unit = { 53 | val expectedEdges = extractEdgesFrom(expected, edgeRegex) 54 | assert(expectedEdges.nonEmpty && extractEdgesFrom(actual, edgeRegex) == expectedEdges, 55 | s"`$actual` didn't match an expected string `$expected`") 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/flow/TPCDSFlowTestSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.flow 19 | 20 | import org.apache.spark.sql.SparkSession 21 | 22 | class TPCDSFlowTestSuite extends SQLFlowTestSuite with TPCDSTest { 23 | 24 | override protected def baseResourcePath = { 25 | getWorkspaceFilePath(tpcdsResourceFilePath.head, tpcdsResourceFilePath.tail: _*).toFile 26 | } 27 | 28 | override protected def ignoreList: Set[String] = Set( 29 | // TODO: Cannot generate a dot file for `q28.sql` 30 | "q28.sql" 31 | ) 32 | 33 | override protected def runQuery(query: String, session: SparkSession): Unit = { 34 | session.sql(query).createOrReplaceTempView("v") 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/flow/TPCDSTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.flow 19 | 20 | import org.scalatest.BeforeAndAfterAll 21 | 22 | import org.apache.spark.sql.SparkSession 23 | import org.apache.spark.sql.catalyst.TableIdentifier 24 | import org.apache.spark.sql.test.SharedSparkSession 25 | 26 | trait TPCDSTest extends TPCDSSchema with BeforeAndAfterAll { 27 | self: SharedSparkSession => 28 | 29 | protected def tpcdsResourceFilePath = 30 | Seq("src", "test", "resources", "tpcds-flow-tests") 31 | 32 | private val tableNames: Iterable[String] = tableColumns.keys 33 | 34 | private def createTable(spark: SparkSession, tableName: String): Unit = { 35 | spark.sql( 36 | s""" 37 | |CREATE TABLE `$tableName` (${tableColumns(tableName)}) 38 | |USING parquet 39 | """.stripMargin) 40 | } 41 | 42 | override def beforeAll(): Unit = { 43 | super.beforeAll() 44 | tableNames.foreach { tableName => 45 | createTable(spark, tableName) 46 | } 47 | } 48 | 49 | override def afterAll(): Unit = { 50 | tableNames.foreach { tableName => 51 | spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true) 52 | } 53 | super.afterAll() 54 | } 55 | } --------------------------------------------------------------------------------