├── .gitignore ├── LICENSE ├── README.md ├── flink-kyuubi-tpcds ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── github │ │ └── deadwind4 │ │ └── benchmark │ │ └── flink │ │ └── kyuubi │ │ ├── Benchmark.java │ │ ├── QueryUtil.java │ │ └── Runner.java │ └── resources │ └── queries │ ├── q1.sql │ ├── q10.sql │ ├── q11.sql │ ├── q12.sql │ ├── q13.sql │ ├── q14a.sql │ ├── q14b.sql │ ├── q15.sql │ ├── q16.sql │ ├── q17.sql │ ├── q18.sql │ ├── q19.sql │ ├── q2.sql │ ├── q20.sql │ ├── q21.sql │ ├── q22.sql │ ├── q23a.sql │ ├── q23b.sql │ ├── q24.sql │ ├── q25.sql │ ├── q26.sql │ ├── q27.sql │ ├── q28.sql │ ├── q29.sql │ ├── q3.sql │ ├── q30.sql │ ├── q31.sql │ ├── q32.sql │ ├── q33.sql │ ├── q34.sql │ ├── q35.sql │ ├── q36.sql │ ├── q37.sql │ ├── q38.sql │ ├── q39a.sql │ ├── q39b.sql │ ├── q4.sql │ ├── q40.sql │ ├── q41.sql │ ├── q42.sql │ ├── q43.sql │ ├── q44.sql │ ├── q45.sql │ ├── q46.sql │ ├── q47.sql │ ├── q48.sql │ ├── q49.sql │ ├── q5.sql │ ├── q50.sql │ ├── q51.sql │ ├── q52.sql │ ├── q53.sql │ ├── q54.sql │ ├── q55.sql │ ├── q56.sql │ ├── q57.sql │ ├── q58.sql │ ├── q59.sql │ ├── q6.sql │ ├── q60.sql │ ├── q61.sql │ ├── q62.sql │ ├── q63.sql │ ├── q64.sql │ ├── q65.sql │ ├── q66.sql │ ├── q67.sql │ ├── q68.sql │ ├── q69.sql │ ├── q7.sql │ ├── q70.sql │ ├── q71.sql │ ├── q72.sql │ ├── q73.sql │ ├── q74.sql │ ├── q75.sql │ ├── q76.sql │ ├── q77.sql │ ├── q78.sql │ ├── q79.sql │ ├── q8.sql │ ├── q80.sql │ ├── q81.sql │ ├── q82.sql │ ├── q83.sql │ ├── q84.sql │ ├── q85.sql │ ├── q86.sql │ ├── q87.sql │ ├── q88.sql │ ├── q89.sql │ ├── q9.sql │ ├── q90.sql │ ├── q91.sql │ ├── q92.sql │ ├── q93.sql │ ├── q94.sql │ ├── q95.sql │ ├── q96.sql │ ├── q97.sql │ ├── q98.sql │ └── q99.sql ├── flink-tpcds ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── github │ │ │ └── deadwind4 │ │ │ └── benchmark │ │ │ └── trino │ │ │ └── benchmark │ │ │ └── flink │ │ │ ├── Benchmark.java │ │ │ ├── QueryUtil.java │ │ │ └── Runner.java │ └── resources │ │ └── queries │ │ ├── q1.sql │ │ ├── q10.sql │ │ ├── q11.sql │ │ ├── q12.sql │ │ ├── q13.sql │ │ ├── q14a.sql │ │ ├── q14b.sql │ │ ├── q15.sql │ │ ├── q16.sql │ │ ├── q17.sql │ │ ├── q18.sql │ │ ├── q19.sql │ │ ├── q2.sql │ │ ├── q20.sql │ │ ├── q21.sql │ │ ├── q22.sql │ │ ├── q23a.sql │ │ ├── q23b.sql │ │ ├── q24.sql │ │ ├── q25.sql │ │ ├── q26.sql │ │ ├── q27.sql │ │ ├── q28.sql │ │ ├── q29.sql │ │ ├── q3.sql │ │ ├── q30.sql │ │ ├── q31.sql │ │ ├── q32.sql │ │ ├── q33.sql │ │ ├── q34.sql │ │ ├── q35.sql │ │ ├── q36.sql │ │ ├── q37.sql │ │ ├── q38.sql │ │ ├── q39a.sql │ │ ├── q39b.sql │ │ ├── q4.sql │ │ ├── q40.sql │ │ ├── q41.sql │ │ ├── q42.sql │ │ ├── q43.sql │ │ ├── q44.sql │ │ ├── q45.sql │ │ ├── q46.sql │ │ ├── q47.sql │ │ ├── q48.sql │ │ ├── q49.sql │ │ ├── q5.sql │ │ ├── q50.sql │ │ ├── q51.sql │ │ ├── q52.sql │ │ ├── q53.sql │ │ ├── q54.sql │ │ ├── q55.sql │ │ ├── q56.sql │ │ ├── q57.sql │ │ ├── q58.sql │ │ ├── q59.sql │ │ ├── q6.sql │ │ ├── q60.sql │ │ ├── q61.sql │ │ ├── q62.sql │ │ ├── q63.sql │ │ ├── q64.sql │ │ ├── q65.sql │ │ ├── q66.sql │ │ ├── q67.sql │ │ ├── q68.sql │ │ ├── q69.sql │ │ ├── q7.sql │ │ ├── q70.sql │ │ ├── q71.sql │ │ ├── q72.sql │ │ ├── q73.sql │ │ ├── q74.sql │ │ ├── q75.sql │ │ ├── q76.sql │ │ ├── q77.sql │ │ ├── q78.sql │ │ ├── q79.sql │ │ ├── q8.sql │ │ ├── q80.sql │ │ ├── q81.sql │ │ ├── q82.sql │ │ ├── q83.sql │ │ ├── q84.sql │ │ ├── q85.sql │ │ ├── q86.sql │ │ ├── q87.sql │ │ ├── q88.sql │ │ ├── q89.sql │ │ ├── q9.sql │ │ ├── q90.sql │ │ ├── q91.sql │ │ ├── q92.sql │ │ ├── q93.sql │ │ ├── q94.sql │ │ ├── q95.sql │ │ ├── q96.sql │ │ ├── q97.sql │ │ ├── q98.sql │ │ └── q99.sql │ └── test │ └── java │ └── com │ └── github │ └── deadwind4 │ └── benchmark │ └── trino │ └── benchmark │ └── flink │ └── QueryUtilTest.java ├── hadoop-utilities ├── pom.xml └── src │ └── main │ └── com │ └── github │ └── a49a │ ├── Demo.java │ └── HdfsBatchRename.java ├── hive-tpcds-setup ├── Makefile ├── ddl-tpcds │ ├── bin │ │ ├── add_constraints.sql │ │ ├── analyze.sql │ │ ├── call_center.sql │ │ ├── catalog_page.sql │ │ ├── catalog_returns.sql │ │ ├── catalog_sales.sql │ │ ├── customer.sql │ │ ├── customer_address.sql │ │ ├── customer_demographics.sql │ │ ├── date_dim.sql │ │ ├── household_demographics.sql │ │ ├── income_band.sql │ │ ├── inventory.sql │ │ ├── item.sql │ │ ├── promotion.sql │ │ ├── reason.sql │ │ ├── ship_mode.sql │ │ ├── store.sql │ │ ├── store_returns.sql │ │ ├── store_sales.sql │ │ ├── time_dim.sql │ │ ├── warehouse.sql │ │ ├── web_page.sql │ │ ├── web_returns.sql │ │ ├── web_sales.sql │ │ └── web_site.sql │ └── text │ │ └── alltables.sql ├── patches │ ├── Darwin │ │ ├── macosx.2.patch │ │ └── macosx.patch │ └── all │ │ ├── tpcds-buffered.patch │ │ ├── tpcds-strcpy.patch │ │ └── tpcds_misspelled_header_guard.patch ├── pom.xml ├── src │ └── main │ │ └── java │ │ └── com │ │ └── github │ │ └── deadwind4 │ │ └── benchmark │ │ └── GenTable.java ├── tpcds-build.sh └── tpcds-setup.sh ├── jdbc-common ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── github │ │ └── deadwind4 │ │ └── benchmark │ │ └── jdbc │ │ ├── Benchmark.java │ │ ├── QueryUtils.java │ │ └── Runner.java │ └── resources │ ├── config.properties │ └── queries │ ├── q0.sql │ ├── q1.sql │ ├── q10.sql │ ├── q11.sql │ ├── q12.sql │ ├── q13.sql │ ├── q14a.sql │ ├── q14b.sql │ ├── q15.sql │ ├── q16.sql │ ├── q17.sql │ ├── q18.sql │ ├── q19.sql │ ├── q2.sql │ ├── q20.sql │ ├── q21.sql │ ├── q22.sql │ ├── q23a.sql │ ├── q23b.sql │ ├── q24.sql │ ├── q25.sql │ ├── q26.sql │ ├── q27.sql │ ├── q28.sql │ ├── q29.sql │ ├── q3.sql │ ├── q30.sql │ ├── q31.sql │ ├── q32.sql │ ├── q33.sql │ ├── q34.sql │ ├── q35.sql │ ├── q36.sql │ ├── q37.sql │ ├── q38.sql │ ├── q39a.sql │ ├── q39b.sql │ ├── q4.sql │ ├── q40.sql │ ├── q41.sql │ ├── q42.sql │ ├── q43.sql │ ├── q44.sql │ ├── q45.sql │ ├── q46.sql │ ├── q47.sql │ ├── q48.sql │ ├── q49.sql │ ├── q5.sql │ ├── q50.sql │ ├── q51.sql │ ├── q52.sql │ ├── q53.sql │ ├── q54.sql │ ├── q55.sql │ ├── q56.sql │ ├── q57.sql │ ├── q58.sql │ ├── q59.sql │ ├── q6.sql │ ├── q60.sql │ ├── q61.sql │ ├── q62.sql │ ├── q63.sql │ ├── q64.sql │ ├── q65.sql │ ├── q66.sql │ ├── q67.sql │ ├── q68.sql │ ├── q69.sql │ ├── q7.sql │ ├── q70.sql │ ├── q71.sql │ ├── q72.sql │ ├── q73.sql │ ├── q74.sql │ ├── q75.sql │ ├── q76.sql │ ├── q77.sql │ ├── q78.sql │ ├── q79.sql │ ├── q8.sql │ ├── q80.sql │ ├── q81.sql │ ├── q82.sql │ ├── q83.sql │ ├── q84.sql │ ├── q85.sql │ ├── q86.sql │ ├── q87.sql │ ├── q88.sql │ ├── q89.sql │ ├── q9.sql │ ├── q90.sql │ ├── q91.sql │ ├── q92.sql │ ├── q93.sql │ ├── q94.sql │ ├── q95.sql │ ├── q96.sql │ ├── q97.sql │ ├── q98.sql │ └── q99.sql ├── pom.xml ├── presto-tpcds-241 └── pom.xml ├── presto-tpcds ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── github │ │ └── deadwind4 │ │ └── benchmark │ │ └── presto │ │ ├── Benchmark.java │ │ ├── QueryUtil.java │ │ └── Runner.java │ └── resources │ └── queries │ ├── q1.sql │ ├── q10.sql │ ├── q11.sql │ ├── q12.sql │ ├── q13.sql │ ├── q14a.sql │ ├── q14b.sql │ ├── q15.sql │ ├── q16.sql │ ├── q17.sql │ ├── q18.sql │ ├── q19.sql │ ├── q2.sql │ ├── q20.sql │ ├── q21.sql │ ├── q22.sql │ ├── q23a.sql │ ├── q23b.sql │ ├── q24.sql │ ├── q25.sql │ ├── q26.sql │ ├── q27.sql │ ├── q28.sql │ ├── q29.sql │ ├── q3.sql │ ├── q30.sql │ ├── q31.sql │ ├── q32.sql │ ├── q33.sql │ ├── q34.sql │ ├── q35.sql │ ├── q36.sql │ ├── q37.sql │ ├── q38.sql │ ├── q39a.sql │ ├── q39b.sql │ ├── q4.sql │ ├── q40.sql │ ├── q41.sql │ ├── q42.sql │ ├── q43.sql │ ├── q44.sql │ ├── q45.sql │ ├── q46.sql │ ├── q47.sql │ ├── q48.sql │ ├── q49.sql │ ├── q5.sql │ ├── q50.sql │ ├── q51.sql │ ├── q52.sql │ ├── q53.sql │ ├── q54.sql │ ├── q55.sql │ ├── q56.sql │ ├── q57.sql │ ├── q58.sql │ ├── q59.sql │ ├── q6.sql │ ├── q60.sql │ ├── q61.sql │ ├── q62.sql │ ├── q63.sql │ ├── q64.sql │ ├── q65.sql │ ├── q66.sql │ ├── q67.sql │ ├── q68.sql │ ├── q69.sql │ ├── q7.sql │ ├── q70.sql │ ├── q71.sql │ ├── q72.sql │ ├── q73.sql │ ├── q74.sql │ ├── q75.sql │ ├── q76.sql │ ├── q77.sql │ ├── q78.sql │ ├── q79.sql │ ├── q8.sql │ ├── q80.sql │ ├── q81.sql │ ├── q82.sql │ ├── q83.sql │ ├── q84.sql │ ├── q85.sql │ ├── q86.sql │ ├── q87.sql │ ├── q88.sql │ ├── q89.sql │ ├── q9.sql │ ├── q90.sql │ ├── q91.sql │ ├── q92.sql │ ├── q93.sql │ ├── q94.sql │ ├── q95.sql │ ├── q96.sql │ ├── q97.sql │ ├── q98.sql │ └── q99.sql ├── spark-hudi ├── pom.xml └── src │ └── main │ ├── resources │ └── queries │ │ ├── q0.sql │ │ ├── q1.sql │ │ ├── q10.sql │ │ ├── q11.sql │ │ ├── q12.sql │ │ ├── q13.sql │ │ ├── q14a.sql │ │ ├── q14b.sql │ │ ├── q15.sql │ │ ├── q16.sql │ │ ├── q17.sql │ │ ├── q18.sql │ │ ├── q19.sql │ │ ├── q2.sql │ │ ├── q20.sql │ │ ├── q21.sql │ │ ├── q22.sql │ │ ├── q23a.sql │ │ ├── q23b.sql │ │ ├── q24.sql │ │ ├── q25.sql │ │ ├── q26.sql │ │ ├── q27.sql │ │ ├── q28.sql │ │ ├── q29.sql │ │ ├── q3.sql │ │ ├── q30.sql │ │ ├── q31.sql │ │ ├── q32.sql │ │ ├── q33.sql │ │ ├── q34.sql │ │ ├── q35.sql │ │ ├── q36.sql │ │ ├── q37.sql │ │ ├── q38.sql │ │ ├── q39a.sql │ │ ├── q39b.sql │ │ ├── q4.sql │ │ ├── q40.sql │ │ ├── q41.sql │ │ ├── q42.sql │ │ ├── q43.sql │ │ ├── q44.sql │ │ ├── q45.sql │ │ ├── q46.sql │ │ ├── q47.sql │ │ ├── q48.sql │ │ ├── q49.sql │ │ ├── q5.sql │ │ ├── q50.sql │ │ ├── q51.sql │ │ ├── q52.sql │ │ ├── q53.sql │ │ ├── q54.sql │ │ ├── q55.sql │ │ ├── q56.sql │ │ ├── q57.sql │ │ ├── q58.sql │ │ ├── q59.sql │ │ ├── q6.sql │ │ ├── q60.sql │ │ ├── q61.sql │ │ ├── q62.sql │ │ ├── q63.sql │ │ ├── q64.sql │ │ ├── q65.sql │ │ ├── q66.sql │ │ ├── q67.sql │ │ ├── q68.sql │ │ ├── q69.sql │ │ ├── q7.sql │ │ ├── q70.sql │ │ ├── q71.sql │ │ ├── q72.sql │ │ ├── q73.sql │ │ ├── q74.sql │ │ ├── q75.sql │ │ ├── q76.sql │ │ ├── q77.sql │ │ ├── q78.sql │ │ ├── q79.sql │ │ ├── q8.sql │ │ ├── q80.sql │ │ ├── q81.sql │ │ ├── q82.sql │ │ ├── q83.sql │ │ ├── q84.sql │ │ ├── q85.sql │ │ ├── q86.sql │ │ ├── q87.sql │ │ ├── q88.sql │ │ ├── q89.sql │ │ ├── q9.sql │ │ ├── q90.sql │ │ ├── q91.sql │ │ ├── q92.sql │ │ ├── q93.sql │ │ ├── q94.sql │ │ ├── q95.sql │ │ ├── q96.sql │ │ ├── q97.sql │ │ ├── q98.sql │ │ └── q99.sql │ └── scala │ └── spark3 │ └── sql │ ├── CliOptions.java │ └── SqlWrapper.scala ├── spark-iceberg └── pom.xml └── trino-tpcds ├── pom.xml └── src └── main ├── java └── com │ └── github │ └── deadwind4 │ └── benchmark │ └── trino │ ├── Benchmark.java │ ├── QueryUtil.java │ └── Runner.java └── resources └── queries ├── q1.sql ├── q10.sql ├── q11.sql ├── q12.sql ├── q13.sql ├── q14a.sql ├── q14b.sql ├── q15.sql ├── q16.sql ├── q17.sql ├── q18.sql ├── q19.sql ├── q2.sql ├── q20.sql ├── q21.sql ├── q22.sql ├── q23a.sql ├── q23b.sql ├── q24.sql ├── q25.sql ├── q26.sql ├── q27.sql ├── q28.sql ├── q29.sql ├── q3.sql ├── q30.sql ├── q31.sql ├── q32.sql ├── q33.sql ├── q34.sql ├── q35.sql ├── q36.sql ├── q37.sql ├── q38.sql ├── q39a.sql ├── q39b.sql ├── q4.sql ├── q40.sql ├── q41.sql ├── q42.sql ├── q43.sql ├── q44.sql ├── q45.sql ├── q46.sql ├── q47.sql ├── q48.sql ├── q49.sql ├── q5.sql ├── q50.sql ├── q51.sql ├── q52.sql ├── q53.sql ├── q54.sql ├── q55.sql ├── q56.sql ├── q57.sql ├── q58.sql ├── q59.sql ├── q6.sql ├── q60.sql ├── q61.sql ├── q62.sql ├── q63.sql ├── q64.sql ├── q65.sql ├── q66.sql ├── q67.sql ├── q68.sql ├── q69.sql ├── q7.sql ├── q70.sql ├── q71.sql ├── q72.sql ├── q73.sql ├── q74.sql ├── q75.sql ├── q76.sql ├── q77.sql ├── q78.sql ├── q79.sql ├── q8.sql ├── q80.sql ├── q81.sql ├── q82.sql ├── q83.sql ├── q84.sql ├── q85.sql ├── q86.sql ├── q87.sql ├── q88.sql ├── q89.sql ├── q9.sql ├── q90.sql ├── q91.sql ├── q92.sql ├── q93.sql ├── q94.sql ├── q95.sql ├── q96.sql ├── q97.sql ├── q98.sql └── q99.sql /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | *.class 3 | 4 | # Log file 5 | *.log 6 | 7 | # BlueJ files 8 | *.ctxt 9 | 10 | # Mobile Tools for Java (J2ME) 11 | .mtj.tmp/ 12 | 13 | # Package Files # 14 | *.jar 15 | *.war 16 | *.nar 17 | *.ear 18 | *.zip 19 | *.tar.gz 20 | *.rar 21 | 22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 23 | hs_err_pid* 24 | 25 | .cache 26 | scalastyle-output.xml 27 | .classpath 28 | .idea 29 | .metadata 30 | .settings 31 | .project 32 | .version.properties 33 | filter.properties 34 | logs.zip 35 | target 36 | tmp 37 | *.iml 38 | *.swp 39 | *.pyc 40 | .DS_Store 41 | atlassian-ide-plugin.xml 42 | out/ 43 | *.ipr 44 | *.iws 45 | load_*_*.mk 46 | 47 | spark-hudi/src/main/resources/hive-site.xml 48 | spark-hudi/src/main/resources/hdfs-site.xml 49 | spark-hudi/src/main/resources/core-site.xml 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Big Data SQL Benchmark 2 | This tool tests big data components by running some TPC-DS SQL. 3 | 4 | # Project Structure 5 | 6 | ### hive-tpcds-setup 7 | TPC-DS data generator. 8 | If you want to test Hudi or Iceberg, you can use official tools to convert this data to Hudi or Iceberg. 9 | Hudi tool: https://hudi.apache.org/docs/migration_guide 10 | Iceberg tool: https://iceberg.apache.org/docs/latest/spark-procedures/#migrate 11 | 12 | ### jdbc-common 13 | It tests TPC-DS via JDBC. You can run an [Apache Kyuubi][kyuubi-official-site] to expose JDBC service of Spark or Flink. 14 | 15 | ### spark-hudi 16 | It tests TPC-DS SQL via a Spark SQL jar job. 17 | 18 | [kyuubi-official-site]: https://kyuubi.apache.org/ 19 | 20 | ### Acknowledgements 21 | 22 | Inspired by https://github.com/ververica/flink-sql-benchmark -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q1.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query1.tpl and seed 2031708268 2 | with customer_total_return as 3 | (select sr_customer_sk as ctr_customer_sk 4 | ,sr_store_sk as ctr_store_sk 5 | ,sum(sr_fee) as ctr_total_return 6 | from store_returns 7 | ,date_dim 8 | where sr_returned_date_sk = d_date_sk 9 | and d_year =2000 10 | group by sr_customer_sk 11 | ,sr_store_sk) 12 | select c_customer_id 13 | from customer_total_return ctr1 14 | ,store 15 | ,customer 16 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 17 | from customer_total_return ctr2 18 | where ctr1.ctr_store_sk = ctr2.ctr_store_sk) 19 | and s_store_sk = ctr1.ctr_store_sk 20 | and s_state = 'NM' 21 | and ctr1.ctr_customer_sk = c_customer_sk 22 | order by c_customer_id 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query1.tpl 26 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q12.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query12.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,i_item_id 7 | ,sum(ws_ext_sales_price) as itemrevenue 8 | ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over 9 | (partition by i_class) as revenueratio 10 | from 11 | web_sales 12 | ,item 13 | ,date_dim 14 | where 15 | ws_item_sk = i_item_sk 16 | and i_category in ('Jewelry', 'Sports', 'Books') 17 | and ws_sold_date_sk = d_date_sk 18 | and d_date between cast('2001-01-12' as date) 19 | and (cast('2001-01-12' as date) + interval '30' day) 20 | group by 21 | i_item_id 22 | ,i_item_desc 23 | ,i_category 24 | ,i_class 25 | ,i_current_price 26 | order by 27 | i_category 28 | ,i_class 29 | ,i_item_id 30 | ,i_item_desc 31 | ,revenueratio 32 | limit 100 33 | 34 | -- end query 1 in stream 0 using template query12.tpl 35 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q15.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query15.tpl and seed 1819994127 2 | select ca_zip 3 | ,sum(cs_sales_price) 4 | from catalog_sales 5 | ,customer 6 | ,customer_address 7 | ,date_dim 8 | where cs_bill_customer_sk = c_customer_sk 9 | and c_current_addr_sk = ca_address_sk 10 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', 11 | '85392', '85460', '80348', '81792') 12 | or ca_state in ('CA','WA','GA') 13 | or cs_sales_price > 500) 14 | and cs_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip 17 | order by ca_zip 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query15.tpl 21 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q19.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query19.tpl and seed 1930872976 2 | select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item,customer,customer_address,store 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=7 8 | and d_moy=11 9 | and d_year=1999 10 | and ss_customer_sk = c_customer_sk 11 | and c_current_addr_sk = ca_address_sk 12 | and substr(ca_zip,1,5) <> substr(s_zip,1,5) 13 | and ss_store_sk = s_store_sk 14 | group by i_brand 15 | ,i_brand_id 16 | ,i_manufact_id 17 | ,i_manufact 18 | order by ext_price desc 19 | ,i_brand 20 | ,i_brand_id 21 | ,i_manufact_id 22 | ,i_manufact 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query19.tpl 26 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q20.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query20.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(cs_ext_sales_price) as itemrevenue 7 | ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from catalog_sales 10 | ,item 11 | ,date_dim 12 | where cs_item_sk = i_item_sk 13 | and i_category in ('Jewelry', 'Sports', 'Books') 14 | and cs_sold_date_sk = d_date_sk 15 | and d_date between cast('2001-01-12' as date) 16 | and (cast('2001-01-12' as date) + interval '30' day) 17 | group by i_item_id 18 | ,i_item_desc 19 | ,i_category 20 | ,i_class 21 | ,i_current_price 22 | order by i_category 23 | ,i_class 24 | ,i_item_id 25 | ,i_item_desc 26 | ,revenueratio 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query20.tpl 30 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q22.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query22.tpl and seed 1819994127 2 | select i_product_name 3 | ,i_brand 4 | ,i_class 5 | ,i_category 6 | ,avg(inv_quantity_on_hand) qoh 7 | from inventory 8 | ,date_dim 9 | ,item 10 | ,warehouse 11 | where inv_date_sk=d_date_sk 12 | and inv_item_sk=i_item_sk 13 | and inv_warehouse_sk = w_warehouse_sk 14 | and d_month_seq between 1212 and 1212 + 11 15 | group by rollup(i_product_name 16 | ,i_brand 17 | ,i_class 18 | ,i_category) 19 | order by qoh, i_product_name, i_brand, i_class, i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query22.tpl 23 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q26.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query26.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(cs_quantity) agg1, 4 | avg(cs_list_price) agg2, 5 | avg(cs_coupon_amt) agg3, 6 | avg(cs_sales_price) agg4 7 | from catalog_sales, customer_demographics, date_dim, item, promotion 8 | where cs_sold_date_sk = d_date_sk and 9 | cs_item_sk = i_item_sk and 10 | cs_bill_cdemo_sk = cd_demo_sk and 11 | cs_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query26.tpl 22 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q27.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query27.tpl and seed 2017787633 2 | select i_item_id, 3 | s_state, grouping(s_state) g_state, 4 | avg(ss_quantity) agg1, 5 | avg(ss_list_price) agg2, 6 | avg(ss_coupon_amt) agg3, 7 | avg(ss_sales_price) agg4 8 | from store_sales, customer_demographics, date_dim, store, item 9 | where ss_sold_date_sk = d_date_sk and 10 | ss_item_sk = i_item_sk and 11 | ss_store_sk = s_store_sk and 12 | ss_cdemo_sk = cd_demo_sk and 13 | cd_gender = 'M' and 14 | cd_marital_status = 'U' and 15 | cd_education_status = '2 yr Degree' and 16 | d_year = 2001 and 17 | s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') 18 | group by rollup (i_item_id, s_state) 19 | order by i_item_id 20 | ,s_state 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query27.tpl 24 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q3.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query3.tpl and seed 2031708268 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) sum_agg 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manufact_id = 436 12 | and dt.d_moy=12 13 | group by dt.d_year 14 | ,item.i_brand 15 | ,item.i_brand_id 16 | order by dt.d_year 17 | ,sum_agg desc 18 | ,brand_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query3.tpl 22 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q32.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query32.tpl and seed 2031708268 2 | select sum(cs_ext_discount_amt) as `excess discount amount` 3 | from 4 | catalog_sales 5 | ,item 6 | ,date_dim 7 | where 8 | i_manufact_id = 269 9 | and i_item_sk = cs_item_sk 10 | and d_date between '1998-03-18' and 11 | (cast('1998-03-18' as date) + interval '90' day) 12 | and d_date_sk = cs_sold_date_sk 13 | and cs_ext_discount_amt 14 | > ( 15 | select 16 | 1.3 * avg(cs_ext_discount_amt) 17 | from 18 | catalog_sales 19 | ,date_dim 20 | where 21 | cs_item_sk = i_item_sk 22 | and d_date between '1998-03-18' and 23 | (cast('1998-03-18' as date) + interval '90' day) 24 | and d_date_sk = cs_sold_date_sk 25 | ) 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query32.tpl 29 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q36.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query36.tpl and seed 1544728811 2 | select 3 | sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent 11 | from 12 | store_sales 13 | ,date_dim d1 14 | ,item 15 | ,store 16 | where 17 | d1.d_year = 1999 18 | and d1.d_date_sk = ss_sold_date_sk 19 | and i_item_sk = ss_item_sk 20 | and s_store_sk = ss_store_sk 21 | and s_state in ('SD','FL','MI','LA', 22 | 'MO','SC','AL','GA') 23 | group by rollup(i_category,i_class) 24 | order by 25 | lochierarchy desc 26 | ,case when lochierarchy = 0 then i_category end 27 | ,rank_within_parent 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query36.tpl 31 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q37.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query37.tpl and seed 301843662 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, catalog_sales 6 | where i_current_price between 22 and 22 + 30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + interval '60' day) 10 | and i_manufact_id in (678,964,918,849) 11 | and inv_quantity_on_hand between 100 and 500 12 | and cs_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query37.tpl 18 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q38.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query38.tpl and seed 1819994127 2 | select count(*) from ( 3 | select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | intersect 9 | select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212 + 11 14 | intersect 15 | select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212 + 11 20 | ) hot_cust 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query38.tpl 24 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q39b.sql: -------------------------------------------------------------------------------- 1 | with inv as 2 | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 3 | ,stdev,mean, case mean when 0 then null else stdev/mean end cov 4 | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 5 | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean 6 | from inventory 7 | ,item 8 | ,warehouse 9 | ,date_dim 10 | where inv_item_sk = i_item_sk 11 | and inv_warehouse_sk = w_warehouse_sk 12 | and inv_date_sk = d_date_sk 13 | and d_year =1999 14 | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo 15 | where case mean when 0 then 0 else stdev/mean end > 1) 16 | select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov 17 | ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov 18 | from inv inv1,inv inv2 19 | where inv1.i_item_sk = inv2.i_item_sk 20 | and inv1.w_warehouse_sk = inv2.w_warehouse_sk 21 | and inv1.d_moy=4 22 | and inv2.d_moy=4+1 23 | and inv1.cov > 1.5 24 | order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov 25 | ,inv2.d_moy,inv2.mean, inv2.cov 26 | 27 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q40.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query40.tpl and seed 1819994127 2 | select 3 | w_state 4 | ,i_item_id 5 | ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) 6 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before 7 | ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) 8 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after 9 | from 10 | catalog_sales left outer join catalog_returns on 11 | (cs_order_number = cr_order_number 12 | and cs_item_sk = cr_item_sk) 13 | ,warehouse 14 | ,item 15 | ,date_dim 16 | where 17 | i_current_price between 0.99 and 1.49 18 | and i_item_sk = cs_item_sk 19 | and cs_warehouse_sk = w_warehouse_sk 20 | and cs_sold_date_sk = d_date_sk 21 | and d_date between (cast ('1998-04-08' as date) - interval '30' day) 22 | and (cast ('1998-04-08' as date) + interval '30' day) 23 | group by 24 | w_state,i_item_id 25 | order by w_state,i_item_id 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query40.tpl 29 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q42.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query42.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_category_id 4 | ,item.i_category 5 | ,sum(ss_ext_sales_price) 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_category_id 16 | ,item.i_category 17 | order by sum(ss_ext_sales_price) desc,dt.d_year 18 | ,item.i_category_id 19 | ,item.i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query42.tpl 23 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q45.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query45.tpl and seed 2031708268 2 | select ca_zip, ca_county, sum(ws_sales_price) 3 | from web_sales, customer, customer_address, date_dim, item 4 | where ws_bill_customer_sk = c_customer_sk 5 | and c_current_addr_sk = ca_address_sk 6 | and ws_item_sk = i_item_sk 7 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') 8 | or 9 | i_item_id in (select i_item_id 10 | from item 11 | where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 12 | ) 13 | ) 14 | and ws_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip, ca_county 17 | order by ca_zip, ca_county 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query45.tpl 21 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q52.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query52.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) ext_price 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_brand 16 | ,item.i_brand_id 17 | order by dt.d_year 18 | ,ext_price desc 19 | ,brand_id 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query52.tpl 23 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q55.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query55.tpl and seed 2031708268 2 | select i_brand_id brand_id, i_brand brand, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=36 8 | and d_moy=12 9 | and d_year=2001 10 | group by i_brand, i_brand_id 11 | order by ext_price desc, i_brand_id 12 | limit 100 13 | 14 | -- end query 1 in stream 0 using template query55.tpl 15 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q6.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query6.tpl and seed 1819994127 2 | select a.ca_state state, count(*) cnt 3 | from customer_address a 4 | ,customer c 5 | ,store_sales s 6 | ,date_dim d 7 | ,item i 8 | where a.ca_address_sk = c.c_current_addr_sk 9 | and c.c_customer_sk = s.ss_customer_sk 10 | and s.ss_sold_date_sk = d.d_date_sk 11 | and s.ss_item_sk = i.i_item_sk 12 | and d.d_month_seq = 13 | (select distinct (d_month_seq) 14 | from date_dim 15 | where d_year = 2000 16 | and d_moy = 2 ) 17 | and i.i_current_price > 1.2 * 18 | (select avg(j.i_current_price) 19 | from item j 20 | where j.i_category = i.i_category) 21 | group by a.ca_state 22 | having count(*) >= 10 23 | order by cnt 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query6.tpl 27 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q65.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query65.tpl and seed 1819994127 2 | select 3 | s_store_name, 4 | i_item_desc, 5 | sc.revenue, 6 | i_current_price, 7 | i_wholesale_cost, 8 | i_brand 9 | from store, item, 10 | (select ss_store_sk, avg(revenue) as ave 11 | from 12 | (select ss_store_sk, ss_item_sk, 13 | sum(ss_sales_price) as revenue 14 | from store_sales, date_dim 15 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 16 | group by ss_store_sk, ss_item_sk) sa 17 | group by ss_store_sk) sb, 18 | (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue 19 | from store_sales, date_dim 20 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 21 | group by ss_store_sk, ss_item_sk) sc 22 | where sb.ss_store_sk = sc.ss_store_sk and 23 | sc.revenue <= 0.1 * sb.ave and 24 | s_store_sk = sc.ss_store_sk and 25 | i_item_sk = sc.ss_item_sk 26 | order by s_store_name, i_item_desc 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query65.tpl 30 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q7.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query7.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | from store_sales, customer_demographics, date_dim, item, promotion 8 | where ss_sold_date_sk = d_date_sk and 9 | ss_item_sk = i_item_sk and 10 | ss_cdemo_sk = cd_demo_sk and 11 | ss_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query7.tpl 22 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q79.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query79.tpl and seed 2031708268 2 | select 3 | c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit 4 | from 5 | (select ss_ticket_number 6 | ,ss_customer_sk 7 | ,store.s_city 8 | ,sum(ss_coupon_amt) amt 9 | ,sum(ss_net_profit) profit 10 | from store_sales,date_dim,store,household_demographics 11 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 12 | and store_sales.ss_store_sk = store.s_store_sk 13 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 14 | and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) 15 | and date_dim.d_dow = 1 16 | and date_dim.d_year in (1998,1998+1,1998+2) 17 | and store.s_number_employees between 200 and 295 18 | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer 19 | where ss_customer_sk = c_customer_sk 20 | order by c_last_name,c_first_name,substr(s_city,1,30), profit 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query79.tpl 24 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q82.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query82.tpl and seed 55585014 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, store_sales 6 | where i_current_price between 30 and 30+30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + interval '60' day) 10 | and i_manufact_id in (437,129,727,663) 11 | and inv_quantity_on_hand between 100 and 500 12 | and ss_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query82.tpl 18 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q84.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query84.tpl and seed 1819994127 2 | select c_customer_id as customer_id 3 | ,c_last_name || ', ' || c_first_name as customername 4 | from customer 5 | ,customer_address 6 | ,customer_demographics 7 | ,household_demographics 8 | ,income_band 9 | ,store_returns 10 | where ca_city = 'Hopewell' 11 | and c_current_addr_sk = ca_address_sk 12 | and ib_lower_bound >= 32287 13 | and ib_upper_bound <= 32287 + 50000 14 | and ib_income_band_sk = hd_income_band_sk 15 | and cd_demo_sk = c_current_cdemo_sk 16 | and hd_demo_sk = c_current_hdemo_sk 17 | and sr_cdemo_sk = cd_demo_sk 18 | order by c_customer_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query84.tpl 22 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q86.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query86.tpl and seed 1819994127 2 | select 3 | sum(ws_net_paid) as total_sum 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ws_net_paid) desc) as rank_within_parent 11 | from 12 | web_sales 13 | ,date_dim d1 14 | ,item 15 | where 16 | d1.d_month_seq between 1212 and 1212+11 17 | and d1.d_date_sk = ws_sold_date_sk 18 | and i_item_sk = ws_item_sk 19 | group by rollup(i_category,i_class) 20 | order by 21 | lochierarchy desc, 22 | case when lochierarchy = 0 then i_category end, 23 | rank_within_parent 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query86.tpl 27 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q87.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query87.tpl and seed 1819994127 2 | select count(*) 3 | from ((select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212+11) 8 | except 9 | (select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212+11) 14 | except 15 | (select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212+11) 20 | ) cool_cust 21 | 22 | 23 | -- end query 1 in stream 0 using template query87.tpl 24 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q92.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query92.tpl and seed 2031708268 2 | select 3 | sum(ws_ext_discount_amt) as `Excess Discount Amount` 4 | from 5 | web_sales 6 | ,item 7 | ,date_dim 8 | where 9 | i_manufact_id = 269 10 | and i_item_sk = ws_item_sk 11 | and d_date between '1998-03-18' and 12 | (cast('1998-03-18' as date) + interval '90' day) 13 | and d_date_sk = ws_sold_date_sk 14 | and ws_ext_discount_amt 15 | > ( 16 | SELECT 17 | 1.3 * avg(ws_ext_discount_amt) 18 | FROM 19 | web_sales 20 | ,date_dim 21 | WHERE 22 | ws_item_sk = i_item_sk 23 | and d_date between '1998-03-18' and 24 | (cast('1998-03-18' as date) + interval '90' day) 25 | and d_date_sk = ws_sold_date_sk 26 | ) 27 | order by sum(ws_ext_discount_amt) 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query92.tpl 31 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q93.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query93.tpl and seed 1200409435 2 | select ss_customer_sk 3 | ,sum(act_sales) sumsales 4 | from (select ss_item_sk 5 | ,ss_ticket_number 6 | ,ss_customer_sk 7 | ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price 8 | else (ss_quantity*ss_sales_price) end act_sales 9 | from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk 10 | and sr_ticket_number = ss_ticket_number) 11 | ,reason 12 | where sr_reason_sk = r_reason_sk 13 | and r_reason_desc = 'Did not like the warranty') t 14 | group by ss_customer_sk 15 | order by sumsales, ss_customer_sk 16 | limit 100 17 | 18 | -- end query 1 in stream 0 using template query93.tpl 19 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q94.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query94.tpl and seed 2031708268 2 | select 3 | count(distinct ws_order_number) as `order count` 4 | ,sum(ws_ext_ship_cost) as `total shipping cost` 5 | ,sum(ws_net_profit) as `total net profit` 6 | from 7 | web_sales ws1 8 | ,date_dim 9 | ,customer_address 10 | ,web_site 11 | where 12 | d_date between '1999-5-01' and 13 | (cast('1999-5-01' as date) + interval '60' day) 14 | and ws1.ws_ship_date_sk = d_date_sk 15 | and ws1.ws_ship_addr_sk = ca_address_sk 16 | and ca_state = 'TX' 17 | and ws1.ws_web_site_sk = web_site_sk 18 | and web_company_name = 'pri' 19 | and exists (select * 20 | from web_sales ws2 21 | where ws1.ws_order_number = ws2.ws_order_number 22 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 23 | and not exists(select * 24 | from web_returns wr1 25 | where ws1.ws_order_number = wr1.wr_order_number) 26 | order by count(distinct ws_order_number) 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query94.tpl 30 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q96.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query96.tpl and seed 1819994127 2 | select count(*) 3 | from store_sales 4 | ,household_demographics 5 | ,time_dim, store 6 | where ss_sold_time_sk = time_dim.t_time_sk 7 | and ss_hdemo_sk = household_demographics.hd_demo_sk 8 | and ss_store_sk = s_store_sk 9 | and time_dim.t_hour = 8 10 | and time_dim.t_minute >= 30 11 | and household_demographics.hd_dep_count = 5 12 | and store.s_store_name = 'ese' 13 | order by count(*) 14 | limit 100 15 | 16 | -- end query 1 in stream 0 using template query96.tpl 17 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q97.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query97.tpl and seed 1819994127 2 | with ssci as ( 3 | select ss_customer_sk customer_sk 4 | ,ss_item_sk item_sk 5 | from store_sales,date_dim 6 | where ss_sold_date_sk = d_date_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | group by ss_customer_sk 9 | ,ss_item_sk), 10 | csci as( 11 | select cs_bill_customer_sk customer_sk 12 | ,cs_item_sk item_sk 13 | from catalog_sales,date_dim 14 | where cs_sold_date_sk = d_date_sk 15 | and d_month_seq between 1212 and 1212 + 11 16 | group by cs_bill_customer_sk 17 | ,cs_item_sk) 18 | select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only 19 | ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only 20 | ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog 21 | from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk 22 | and ssci.item_sk = csci.item_sk) 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query97.tpl 26 | -------------------------------------------------------------------------------- /flink-kyuubi-tpcds/src/main/resources/queries/q98.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query98.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(ss_ext_sales_price) as itemrevenue 7 | ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from 10 | store_sales 11 | ,item 12 | ,date_dim 13 | where 14 | ss_item_sk = i_item_sk 15 | and i_category in ('Jewelry', 'Sports', 'Books') 16 | and ss_sold_date_sk = d_date_sk 17 | and d_date between cast('2001-01-12' as date) 18 | and (cast('2001-01-12' as date) + interval '30' day) 19 | group by 20 | i_item_id 21 | ,i_item_desc 22 | ,i_category 23 | ,i_class 24 | ,i_current_price 25 | order by 26 | i_category 27 | ,i_class 28 | ,i_item_id 29 | ,i_item_desc 30 | ,revenueratio 31 | 32 | -- end query 1 in stream 0 using template query98.tpl 33 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q1.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query1.tpl and seed 2031708268 2 | with customer_total_return as 3 | (select sr_customer_sk as ctr_customer_sk 4 | ,sr_store_sk as ctr_store_sk 5 | ,sum(sr_fee) as ctr_total_return 6 | from store_returns 7 | ,date_dim 8 | where sr_returned_date_sk = d_date_sk 9 | and d_year =2000 10 | group by sr_customer_sk 11 | ,sr_store_sk) 12 | select c_customer_id 13 | from customer_total_return ctr1 14 | ,store 15 | ,customer 16 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 17 | from customer_total_return ctr2 18 | where ctr1.ctr_store_sk = ctr2.ctr_store_sk) 19 | and s_store_sk = ctr1.ctr_store_sk 20 | and s_state = 'NM' 21 | and ctr1.ctr_customer_sk = c_customer_sk 22 | order by c_customer_id 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query1.tpl 26 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q12.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query12.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,i_item_id 7 | ,sum(ws_ext_sales_price) as itemrevenue 8 | ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over 9 | (partition by i_class) as revenueratio 10 | from 11 | web_sales 12 | ,item 13 | ,date_dim 14 | where 15 | ws_item_sk = i_item_sk 16 | and i_category in ('Jewelry', 'Sports', 'Books') 17 | and ws_sold_date_sk = d_date_sk 18 | and d_date between cast('2001-01-12' as date) 19 | and (cast('2001-01-12' as date) + interval '30' day) 20 | group by 21 | i_item_id 22 | ,i_item_desc 23 | ,i_category 24 | ,i_class 25 | ,i_current_price 26 | order by 27 | i_category 28 | ,i_class 29 | ,i_item_id 30 | ,i_item_desc 31 | ,revenueratio 32 | limit 100 33 | 34 | -- end query 1 in stream 0 using template query12.tpl 35 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q15.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query15.tpl and seed 1819994127 2 | select ca_zip 3 | ,sum(cs_sales_price) 4 | from catalog_sales 5 | ,customer 6 | ,customer_address 7 | ,date_dim 8 | where cs_bill_customer_sk = c_customer_sk 9 | and c_current_addr_sk = ca_address_sk 10 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', 11 | '85392', '85460', '80348', '81792') 12 | or ca_state in ('CA','WA','GA') 13 | or cs_sales_price > 500) 14 | and cs_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip 17 | order by ca_zip 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query15.tpl 21 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q19.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query19.tpl and seed 1930872976 2 | select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item,customer,customer_address,store 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=7 8 | and d_moy=11 9 | and d_year=1999 10 | and ss_customer_sk = c_customer_sk 11 | and c_current_addr_sk = ca_address_sk 12 | and substr(ca_zip,1,5) <> substr(s_zip,1,5) 13 | and ss_store_sk = s_store_sk 14 | group by i_brand 15 | ,i_brand_id 16 | ,i_manufact_id 17 | ,i_manufact 18 | order by ext_price desc 19 | ,i_brand 20 | ,i_brand_id 21 | ,i_manufact_id 22 | ,i_manufact 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query19.tpl 26 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q20.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query20.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(cs_ext_sales_price) as itemrevenue 7 | ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from catalog_sales 10 | ,item 11 | ,date_dim 12 | where cs_item_sk = i_item_sk 13 | and i_category in ('Jewelry', 'Sports', 'Books') 14 | and cs_sold_date_sk = d_date_sk 15 | and d_date between cast('2001-01-12' as date) 16 | and (cast('2001-01-12' as date) + interval '30' day) 17 | group by i_item_id 18 | ,i_item_desc 19 | ,i_category 20 | ,i_class 21 | ,i_current_price 22 | order by i_category 23 | ,i_class 24 | ,i_item_id 25 | ,i_item_desc 26 | ,revenueratio 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query20.tpl 30 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q22.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query22.tpl and seed 1819994127 2 | select i_product_name 3 | ,i_brand 4 | ,i_class 5 | ,i_category 6 | ,avg(inv_quantity_on_hand) qoh 7 | from inventory 8 | ,date_dim 9 | ,item 10 | ,warehouse 11 | where inv_date_sk=d_date_sk 12 | and inv_item_sk=i_item_sk 13 | and inv_warehouse_sk = w_warehouse_sk 14 | and d_month_seq between 1212 and 1212 + 11 15 | group by rollup(i_product_name 16 | ,i_brand 17 | ,i_class 18 | ,i_category) 19 | order by qoh, i_product_name, i_brand, i_class, i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query22.tpl 23 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q26.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query26.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(cs_quantity) agg1, 4 | avg(cs_list_price) agg2, 5 | avg(cs_coupon_amt) agg3, 6 | avg(cs_sales_price) agg4 7 | from catalog_sales, customer_demographics, date_dim, item, promotion 8 | where cs_sold_date_sk = d_date_sk and 9 | cs_item_sk = i_item_sk and 10 | cs_bill_cdemo_sk = cd_demo_sk and 11 | cs_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query26.tpl 22 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q27.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query27.tpl and seed 2017787633 2 | select i_item_id, 3 | s_state, grouping(s_state) g_state, 4 | avg(ss_quantity) agg1, 5 | avg(ss_list_price) agg2, 6 | avg(ss_coupon_amt) agg3, 7 | avg(ss_sales_price) agg4 8 | from store_sales, customer_demographics, date_dim, store, item 9 | where ss_sold_date_sk = d_date_sk and 10 | ss_item_sk = i_item_sk and 11 | ss_store_sk = s_store_sk and 12 | ss_cdemo_sk = cd_demo_sk and 13 | cd_gender = 'M' and 14 | cd_marital_status = 'U' and 15 | cd_education_status = '2 yr Degree' and 16 | d_year = 2001 and 17 | s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') 18 | group by rollup (i_item_id, s_state) 19 | order by i_item_id 20 | ,s_state 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query27.tpl 24 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q3.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query3.tpl and seed 2031708268 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) sum_agg 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manufact_id = 436 12 | and dt.d_moy=12 13 | group by dt.d_year 14 | ,item.i_brand 15 | ,item.i_brand_id 16 | order by dt.d_year 17 | ,sum_agg desc 18 | ,brand_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query3.tpl 22 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q32.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query32.tpl and seed 2031708268 2 | select sum(cs_ext_discount_amt) as `excess discount amount` 3 | from 4 | catalog_sales 5 | ,item 6 | ,date_dim 7 | where 8 | i_manufact_id = 269 9 | and i_item_sk = cs_item_sk 10 | and d_date between '1998-03-18' and 11 | (cast('1998-03-18' as date) + interval '90' day) 12 | and d_date_sk = cs_sold_date_sk 13 | and cs_ext_discount_amt 14 | > ( 15 | select 16 | 1.3 * avg(cs_ext_discount_amt) 17 | from 18 | catalog_sales 19 | ,date_dim 20 | where 21 | cs_item_sk = i_item_sk 22 | and d_date between '1998-03-18' and 23 | (cast('1998-03-18' as date) + interval '90' day) 24 | and d_date_sk = cs_sold_date_sk 25 | ) 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query32.tpl 29 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q36.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query36.tpl and seed 1544728811 2 | select 3 | sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent 11 | from 12 | store_sales 13 | ,date_dim d1 14 | ,item 15 | ,store 16 | where 17 | d1.d_year = 1999 18 | and d1.d_date_sk = ss_sold_date_sk 19 | and i_item_sk = ss_item_sk 20 | and s_store_sk = ss_store_sk 21 | and s_state in ('SD','FL','MI','LA', 22 | 'MO','SC','AL','GA') 23 | group by rollup(i_category,i_class) 24 | order by 25 | lochierarchy desc 26 | ,case when lochierarchy = 0 then i_category end 27 | ,rank_within_parent 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query36.tpl 31 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q37.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query37.tpl and seed 301843662 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, catalog_sales 6 | where i_current_price between 22 and 22 + 30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + interval '60' day) 10 | and i_manufact_id in (678,964,918,849) 11 | and inv_quantity_on_hand between 100 and 500 12 | and cs_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query37.tpl 18 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q38.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query38.tpl and seed 1819994127 2 | select count(*) from ( 3 | select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | intersect 9 | select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212 + 11 14 | intersect 15 | select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212 + 11 20 | ) hot_cust 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query38.tpl 24 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q39b.sql: -------------------------------------------------------------------------------- 1 | with inv as 2 | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 3 | ,stdev,mean, case mean when 0 then null else stdev/mean end cov 4 | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 5 | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean 6 | from inventory 7 | ,item 8 | ,warehouse 9 | ,date_dim 10 | where inv_item_sk = i_item_sk 11 | and inv_warehouse_sk = w_warehouse_sk 12 | and inv_date_sk = d_date_sk 13 | and d_year =1999 14 | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo 15 | where case mean when 0 then 0 else stdev/mean end > 1) 16 | select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov 17 | ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov 18 | from inv inv1,inv inv2 19 | where inv1.i_item_sk = inv2.i_item_sk 20 | and inv1.w_warehouse_sk = inv2.w_warehouse_sk 21 | and inv1.d_moy=4 22 | and inv2.d_moy=4+1 23 | and inv1.cov > 1.5 24 | order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov 25 | ,inv2.d_moy,inv2.mean, inv2.cov 26 | 27 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q40.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query40.tpl and seed 1819994127 2 | select 3 | w_state 4 | ,i_item_id 5 | ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) 6 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before 7 | ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) 8 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after 9 | from 10 | catalog_sales left outer join catalog_returns on 11 | (cs_order_number = cr_order_number 12 | and cs_item_sk = cr_item_sk) 13 | ,warehouse 14 | ,item 15 | ,date_dim 16 | where 17 | i_current_price between 0.99 and 1.49 18 | and i_item_sk = cs_item_sk 19 | and cs_warehouse_sk = w_warehouse_sk 20 | and cs_sold_date_sk = d_date_sk 21 | and d_date between (cast ('1998-04-08' as date) - interval '30' day) 22 | and (cast ('1998-04-08' as date) + interval '30' day) 23 | group by 24 | w_state,i_item_id 25 | order by w_state,i_item_id 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query40.tpl 29 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q42.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query42.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_category_id 4 | ,item.i_category 5 | ,sum(ss_ext_sales_price) 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_category_id 16 | ,item.i_category 17 | order by sum(ss_ext_sales_price) desc,dt.d_year 18 | ,item.i_category_id 19 | ,item.i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query42.tpl 23 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q45.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query45.tpl and seed 2031708268 2 | select ca_zip, ca_county, sum(ws_sales_price) 3 | from web_sales, customer, customer_address, date_dim, item 4 | where ws_bill_customer_sk = c_customer_sk 5 | and c_current_addr_sk = ca_address_sk 6 | and ws_item_sk = i_item_sk 7 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') 8 | or 9 | i_item_id in (select i_item_id 10 | from item 11 | where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 12 | ) 13 | ) 14 | and ws_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip, ca_county 17 | order by ca_zip, ca_county 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query45.tpl 21 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q52.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query52.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) ext_price 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_brand 16 | ,item.i_brand_id 17 | order by dt.d_year 18 | ,ext_price desc 19 | ,brand_id 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query52.tpl 23 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q55.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query55.tpl and seed 2031708268 2 | select i_brand_id brand_id, i_brand brand, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=36 8 | and d_moy=12 9 | and d_year=2001 10 | group by i_brand, i_brand_id 11 | order by ext_price desc, i_brand_id 12 | limit 100 13 | 14 | -- end query 1 in stream 0 using template query55.tpl 15 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q6.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query6.tpl and seed 1819994127 2 | select a.ca_state state, count(*) cnt 3 | from customer_address a 4 | ,customer c 5 | ,store_sales s 6 | ,date_dim d 7 | ,item i 8 | where a.ca_address_sk = c.c_current_addr_sk 9 | and c.c_customer_sk = s.ss_customer_sk 10 | and s.ss_sold_date_sk = d.d_date_sk 11 | and s.ss_item_sk = i.i_item_sk 12 | and d.d_month_seq = 13 | (select distinct (d_month_seq) 14 | from date_dim 15 | where d_year = 2000 16 | and d_moy = 2 ) 17 | and i.i_current_price > 1.2 * 18 | (select avg(j.i_current_price) 19 | from item j 20 | where j.i_category = i.i_category) 21 | group by a.ca_state 22 | having count(*) >= 10 23 | order by cnt 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query6.tpl 27 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q65.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query65.tpl and seed 1819994127 2 | select 3 | s_store_name, 4 | i_item_desc, 5 | sc.revenue, 6 | i_current_price, 7 | i_wholesale_cost, 8 | i_brand 9 | from store, item, 10 | (select ss_store_sk, avg(revenue) as ave 11 | from 12 | (select ss_store_sk, ss_item_sk, 13 | sum(ss_sales_price) as revenue 14 | from store_sales, date_dim 15 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 16 | group by ss_store_sk, ss_item_sk) sa 17 | group by ss_store_sk) sb, 18 | (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue 19 | from store_sales, date_dim 20 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 21 | group by ss_store_sk, ss_item_sk) sc 22 | where sb.ss_store_sk = sc.ss_store_sk and 23 | sc.revenue <= 0.1 * sb.ave and 24 | s_store_sk = sc.ss_store_sk and 25 | i_item_sk = sc.ss_item_sk 26 | order by s_store_name, i_item_desc 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query65.tpl 30 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q7.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query7.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | from store_sales, customer_demographics, date_dim, item, promotion 8 | where ss_sold_date_sk = d_date_sk and 9 | ss_item_sk = i_item_sk and 10 | ss_cdemo_sk = cd_demo_sk and 11 | ss_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query7.tpl 22 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q79.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query79.tpl and seed 2031708268 2 | select 3 | c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit 4 | from 5 | (select ss_ticket_number 6 | ,ss_customer_sk 7 | ,store.s_city 8 | ,sum(ss_coupon_amt) amt 9 | ,sum(ss_net_profit) profit 10 | from store_sales,date_dim,store,household_demographics 11 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 12 | and store_sales.ss_store_sk = store.s_store_sk 13 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 14 | and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) 15 | and date_dim.d_dow = 1 16 | and date_dim.d_year in (1998,1998+1,1998+2) 17 | and store.s_number_employees between 200 and 295 18 | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer 19 | where ss_customer_sk = c_customer_sk 20 | order by c_last_name,c_first_name,substr(s_city,1,30), profit 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query79.tpl 24 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q82.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query82.tpl and seed 55585014 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, store_sales 6 | where i_current_price between 30 and 30+30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + interval '60' day) 10 | and i_manufact_id in (437,129,727,663) 11 | and inv_quantity_on_hand between 100 and 500 12 | and ss_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query82.tpl 18 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q84.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query84.tpl and seed 1819994127 2 | select c_customer_id as customer_id 3 | ,c_last_name || ', ' || c_first_name as customername 4 | from customer 5 | ,customer_address 6 | ,customer_demographics 7 | ,household_demographics 8 | ,income_band 9 | ,store_returns 10 | where ca_city = 'Hopewell' 11 | and c_current_addr_sk = ca_address_sk 12 | and ib_lower_bound >= 32287 13 | and ib_upper_bound <= 32287 + 50000 14 | and ib_income_band_sk = hd_income_band_sk 15 | and cd_demo_sk = c_current_cdemo_sk 16 | and hd_demo_sk = c_current_hdemo_sk 17 | and sr_cdemo_sk = cd_demo_sk 18 | order by c_customer_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query84.tpl 22 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q86.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query86.tpl and seed 1819994127 2 | select 3 | sum(ws_net_paid) as total_sum 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ws_net_paid) desc) as rank_within_parent 11 | from 12 | web_sales 13 | ,date_dim d1 14 | ,item 15 | where 16 | d1.d_month_seq between 1212 and 1212+11 17 | and d1.d_date_sk = ws_sold_date_sk 18 | and i_item_sk = ws_item_sk 19 | group by rollup(i_category,i_class) 20 | order by 21 | lochierarchy desc, 22 | case when lochierarchy = 0 then i_category end, 23 | rank_within_parent 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query86.tpl 27 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q87.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query87.tpl and seed 1819994127 2 | select count(*) 3 | from ((select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212+11) 8 | except 9 | (select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212+11) 14 | except 15 | (select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212+11) 20 | ) cool_cust 21 | 22 | 23 | -- end query 1 in stream 0 using template query87.tpl 24 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q92.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query92.tpl and seed 2031708268 2 | select 3 | sum(ws_ext_discount_amt) as `Excess Discount Amount` 4 | from 5 | web_sales 6 | ,item 7 | ,date_dim 8 | where 9 | i_manufact_id = 269 10 | and i_item_sk = ws_item_sk 11 | and d_date between '1998-03-18' and 12 | (cast('1998-03-18' as date) + interval '90' day) 13 | and d_date_sk = ws_sold_date_sk 14 | and ws_ext_discount_amt 15 | > ( 16 | SELECT 17 | 1.3 * avg(ws_ext_discount_amt) 18 | FROM 19 | web_sales 20 | ,date_dim 21 | WHERE 22 | ws_item_sk = i_item_sk 23 | and d_date between '1998-03-18' and 24 | (cast('1998-03-18' as date) + interval '90' day) 25 | and d_date_sk = ws_sold_date_sk 26 | ) 27 | order by sum(ws_ext_discount_amt) 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query92.tpl 31 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q93.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query93.tpl and seed 1200409435 2 | select ss_customer_sk 3 | ,sum(act_sales) sumsales 4 | from (select ss_item_sk 5 | ,ss_ticket_number 6 | ,ss_customer_sk 7 | ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price 8 | else (ss_quantity*ss_sales_price) end act_sales 9 | from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk 10 | and sr_ticket_number = ss_ticket_number) 11 | ,reason 12 | where sr_reason_sk = r_reason_sk 13 | and r_reason_desc = 'Did not like the warranty') t 14 | group by ss_customer_sk 15 | order by sumsales, ss_customer_sk 16 | limit 100 17 | 18 | -- end query 1 in stream 0 using template query93.tpl 19 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q94.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query94.tpl and seed 2031708268 2 | select 3 | count(distinct ws_order_number) as `order count` 4 | ,sum(ws_ext_ship_cost) as `total shipping cost` 5 | ,sum(ws_net_profit) as `total net profit` 6 | from 7 | web_sales ws1 8 | ,date_dim 9 | ,customer_address 10 | ,web_site 11 | where 12 | d_date between '1999-5-01' and 13 | (cast('1999-5-01' as date) + interval '60' day) 14 | and ws1.ws_ship_date_sk = d_date_sk 15 | and ws1.ws_ship_addr_sk = ca_address_sk 16 | and ca_state = 'TX' 17 | and ws1.ws_web_site_sk = web_site_sk 18 | and web_company_name = 'pri' 19 | and exists (select * 20 | from web_sales ws2 21 | where ws1.ws_order_number = ws2.ws_order_number 22 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 23 | and not exists(select * 24 | from web_returns wr1 25 | where ws1.ws_order_number = wr1.wr_order_number) 26 | order by count(distinct ws_order_number) 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query94.tpl 30 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q96.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query96.tpl and seed 1819994127 2 | select count(*) 3 | from store_sales 4 | ,household_demographics 5 | ,time_dim, store 6 | where ss_sold_time_sk = time_dim.t_time_sk 7 | and ss_hdemo_sk = household_demographics.hd_demo_sk 8 | and ss_store_sk = s_store_sk 9 | and time_dim.t_hour = 8 10 | and time_dim.t_minute >= 30 11 | and household_demographics.hd_dep_count = 5 12 | and store.s_store_name = 'ese' 13 | order by count(*) 14 | limit 100 15 | 16 | -- end query 1 in stream 0 using template query96.tpl 17 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q97.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query97.tpl and seed 1819994127 2 | with ssci as ( 3 | select ss_customer_sk customer_sk 4 | ,ss_item_sk item_sk 5 | from store_sales,date_dim 6 | where ss_sold_date_sk = d_date_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | group by ss_customer_sk 9 | ,ss_item_sk), 10 | csci as( 11 | select cs_bill_customer_sk customer_sk 12 | ,cs_item_sk item_sk 13 | from catalog_sales,date_dim 14 | where cs_sold_date_sk = d_date_sk 15 | and d_month_seq between 1212 and 1212 + 11 16 | group by cs_bill_customer_sk 17 | ,cs_item_sk) 18 | select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only 19 | ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only 20 | ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog 21 | from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk 22 | and ssci.item_sk = csci.item_sk) 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query97.tpl 26 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q98.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query98.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(ss_ext_sales_price) as itemrevenue 7 | ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from 10 | store_sales 11 | ,item 12 | ,date_dim 13 | where 14 | ss_item_sk = i_item_sk 15 | and i_category in ('Jewelry', 'Sports', 'Books') 16 | and ss_sold_date_sk = d_date_sk 17 | and d_date between cast('2001-01-12' as date) 18 | and (cast('2001-01-12' as date) + interval '30' day) 19 | group by 20 | i_item_id 21 | ,i_item_desc 22 | ,i_category 23 | ,i_class 24 | ,i_current_price 25 | order by 26 | i_category 27 | ,i_class 28 | ,i_item_id 29 | ,i_item_desc 30 | ,revenueratio 31 | 32 | -- end query 1 in stream 0 using template query98.tpl 33 | -------------------------------------------------------------------------------- /hadoop-utilities/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | benchmark-parent 5 | com.github.deadwind4 6 | 1.0-SNAPSHOT 7 | 8 | 9 | 4.0.0 10 | hadoop-utilities 11 | Hadoop Utilities 12 | 13 | 14 | 15 | 16 | org.apache.hadoop 17 | hadoop-client 18 | 2.8.5 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /hadoop-utilities/src/main/com/github/a49a/Demo.java: -------------------------------------------------------------------------------- 1 | package com.github.a49a; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.fs.FileSystem; 5 | import org.apache.hadoop.fs.Path; 6 | 7 | import java.io.IOException; 8 | import java.util.Arrays; 9 | 10 | public class Demo { 11 | public static void main(String[] args) throws IOException { 12 | String path = "hdfs://ns1/tmp/foo_table/"; 13 | Configuration conf = new Configuration(); 14 | conf.addResource(new Path("/Users/ada/opt/hadoop-2.8.5/etc/hadoop/core-site.xml")); 15 | conf.addResource(new Path("/Users/ada/opt/hadoop-2.8.5/etc/hadoop/hdfs-site.xml")); 16 | FileSystem fs = FileSystem.get(conf); 17 | Arrays.stream(fs.listStatus(new Path(path))).forEach(fileStatus -> { 18 | try { 19 | fs.rename(new Path(fileStatus.getPath().toString() + "/foo_f"), new Path(fileStatus.getPath().toString() + "/bar_f")); 20 | } catch (IOException e) { 21 | e.printStackTrace(); 22 | } 23 | }); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /hive-tpcds-setup/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: target/lib/dsdgen.jar target/hive-tpcds-setup-0.1-SNAPSHOT.jar 3 | 4 | target/hive-tpcds-setup-0.1-SNAPSHOT.jar: $(shell find . -name *.java) 5 | mvn package 6 | 7 | target/tpcds_kit.zip: tpcds_kit.zip 8 | mkdir -p target/ 9 | cp tpcds_kit.zip target/tpcds_kit.zip 10 | 11 | tpcds_kit.zip: 12 | curl http://public-repo-1.hortonworks.com/hive-testbench/tpcds/README 13 | [[ -e tpcds_kit.zip ]] || curl --output tpcds_kit.zip https://public-repo-1.hortonworks.com/hive-testbench/tpcds/TPCDS_Tools.zip 14 | 15 | target/lib/dsdgen.jar: target/tools/dsdgen 16 | cd target/; mkdir -p lib/; ( jar cvf lib/dsdgen.jar tools/ || gjar cvf lib/dsdgen.jar tools/ ) 17 | 18 | target/tools/dsdgen: target/tpcds_kit.zip 19 | test -d target/tools/ || (cd target; unzip tpcds_kit.zip) 20 | test -d target/tools/ || (cd target; mv */tools tools) 21 | cd target/tools; cat ../../patches/all/*.patch | patch -p0 22 | cd target/tools; cat ../../patches/${MYOS}/*.patch | patch -p1 23 | cd target/tools; make clean; make dsdgen 24 | 25 | clean: 26 | mvn clean -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/call_center.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists call_center; 5 | 6 | create table call_center 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.call_center; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/catalog_page.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_page; 5 | 6 | create table catalog_page 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.catalog_page; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/catalog_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_returns; 5 | 6 | create table catalog_returns 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.catalog_returns; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/catalog_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_sales; 5 | 6 | create table catalog_sales 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.catalog_sales; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/customer.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer; 5 | 6 | create table customer 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.customer 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/customer_address.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer_address; 5 | 6 | create table customer_address 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.customer_address 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/customer_demographics.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer_demographics; 5 | 6 | create table customer_demographics 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.customer_demographics; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/date_dim.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists date_dim; 5 | 6 | create table date_dim 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.date_dim; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/household_demographics.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists household_demographics; 5 | 6 | create table household_demographics 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.household_demographics; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/income_band.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists income_band; 5 | 6 | create table income_band 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.income_band; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/inventory.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists inventory; 5 | 6 | create table inventory 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.inventory 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/item.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists item; 5 | 6 | create table item 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.item 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/promotion.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists promotion; 5 | 6 | create table promotion 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.promotion; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/reason.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists reason; 5 | 6 | create table reason 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.reason; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/ship_mode.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists ship_mode; 5 | 6 | create table ship_mode 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.ship_mode; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/store.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store; 5 | 6 | create table store 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.store 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/store_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store_returns; 5 | 6 | create table store_returns 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.store_returns 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/store_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store_sales; 5 | 6 | create table store_sales 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.store_sales 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/time_dim.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists time_dim; 5 | 6 | create table time_dim 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.time_dim; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/warehouse.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists warehouse; 5 | 6 | create table warehouse 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.warehouse; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/web_page.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_page; 5 | 6 | create table web_page 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.web_page; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/web_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_returns; 5 | 6 | create table web_returns 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.web_returns 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/web_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_sales; 5 | 6 | create table web_sales 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.web_sales 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/web_site.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_site; 5 | 6 | create table web_site 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.web_site; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/patches/all/tpcds-buffered.patch: -------------------------------------------------------------------------------- 1 | diff --git print.c print.c 2 | index 1b64362..5108bd7 100644 3 | --- print.c 4 | +++ print.c 5 | @@ -68,6 +68,7 @@ print_close(int tbl) 6 | fpOutfile = NULL; 7 | if (pTdef->outfile) 8 | { 9 | + fflush(pTdef->outfile); 10 | fclose(pTdef->outfile); 11 | pTdef->outfile = NULL; 12 | } 13 | @@ -536,7 +538,7 @@ print_end (int tbl) 14 | if (add_term) 15 | fwrite(term, 1, add_term, fpOutfile); 16 | fprintf (fpOutfile, "\n"); 17 | - fflush(fpOutfile); 18 | + //fflush(fpOutfile); 19 | 20 | return (res); 21 | } 22 | -------------------------------------------------------------------------------- /hive-tpcds-setup/patches/all/tpcds-strcpy.patch: -------------------------------------------------------------------------------- 1 | diff --git r_params.c r_params.c 2 | index 4db16e5..9b1a8e6 100644 3 | --- r_params.c 4 | +++ r_params.c 5 | @@ -46,7 +46,7 @@ 6 | #include "tdefs.h" 7 | #include "release.h" 8 | 9 | -#define PARAM_MAX_LEN 80 10 | +#define PARAM_MAX_LEN PATH_MAX 11 | 12 | #ifndef TEST 13 | extern option_t options[]; 14 | @@ -275,7 +275,7 @@ set_str(char *var, char *val) 15 | nParam = fnd_param(var); 16 | if (nParam >= 0) 17 | { 18 | - strcpy(params[options[nParam].index], val); 19 | + strncpy(params[options[nParam].index], val, PARAM_MAX_LEN); 20 | options[nParam].flags |= OPT_SET; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /hive-tpcds-setup/patches/all/tpcds_misspelled_header_guard.patch: -------------------------------------------------------------------------------- 1 | --- w_store_sales.h.orig 2014-06-25 10:58:19.000000000 -0700 2 | +++ w_store_sales.h 2014-06-25 10:58:51.000000000 -0700 3 | @@ -34,7 +34,7 @@ 4 | * Gradient Systems 5 | */ 6 | #ifndef W_STORE_SALES_H 7 | -#define W_STORE_SLAES_H 8 | +#define W_STORE_SALES_H 9 | 10 | #include "constants.h" 11 | #include "pricing.h" 12 | -------------------------------------------------------------------------------- /hive-tpcds-setup/tpcds-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Check for all the stuff we need to function. 4 | for f in gcc javac mvn; do 5 | which $f > /dev/null 2>&1 6 | if [ $? -ne 0 ]; then 7 | echo "Required program $f is missing. Please install or fix your path and try again." 8 | exit 1 9 | fi 10 | done 11 | 12 | echo "Building TPC-DS Data Generator" 13 | 14 | unamestr=`uname` 15 | if [[ "$unamestr" == 'Darwin' ]]; then 16 | export MYOS="Darwin" 17 | fi 18 | 19 | make 20 | echo "TPC-DS Data Generator built, you can now use tpcds-setup.sh to generate data." -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/config.properties: -------------------------------------------------------------------------------- 1 | url=jdbc:hive2://flink02:10009 2 | #database=tpcds_hudi_3 3 | #database=tpcds_hudi_bloom_3 4 | #database=tpcds_hudi_bloom_2 5 | 6 | database=tpcds_bin_partitioned_parquet_3 7 | #database=tpcds_bin_partitioned_parquet_2 8 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q0.sql: -------------------------------------------------------------------------------- 1 | SELECT ss_item_sk, ss_ticket_number FROM store_sales LIMIT 1; 2 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q1.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query1.tpl and seed 2031708268 2 | with customer_total_return as 3 | (select sr_customer_sk as ctr_customer_sk 4 | ,sr_store_sk as ctr_store_sk 5 | ,sum(sr_fee) as ctr_total_return 6 | from store_returns 7 | ,date_dim 8 | where sr_returned_date_sk = d_date_sk 9 | and d_year =2000 10 | group by sr_customer_sk 11 | ,sr_store_sk) 12 | select c_customer_id 13 | from customer_total_return ctr1 14 | ,store 15 | ,customer 16 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 17 | from customer_total_return ctr2 18 | where ctr1.ctr_store_sk = ctr2.ctr_store_sk) 19 | and s_store_sk = ctr1.ctr_store_sk 20 | and s_state = 'NM' 21 | and ctr1.ctr_customer_sk = c_customer_sk 22 | order by c_customer_id 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query1.tpl 26 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q12.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query12.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,i_item_id 7 | ,sum(ws_ext_sales_price) as itemrevenue 8 | ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over 9 | (partition by i_class) as revenueratio 10 | from 11 | web_sales 12 | ,item 13 | ,date_dim 14 | where 15 | ws_item_sk = i_item_sk 16 | and i_category in ('Jewelry', 'Sports', 'Books') 17 | and ws_sold_date_sk = d_date_sk 18 | and d_date between cast('2001-01-12' as date) 19 | and (cast('2001-01-12' as date) + interval '30' day) 20 | group by 21 | i_item_id 22 | ,i_item_desc 23 | ,i_category 24 | ,i_class 25 | ,i_current_price 26 | order by 27 | i_category 28 | ,i_class 29 | ,i_item_id 30 | ,i_item_desc 31 | ,revenueratio 32 | limit 100 33 | 34 | -- end query 1 in stream 0 using template query12.tpl 35 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q15.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query15.tpl and seed 1819994127 2 | select ca_zip 3 | ,sum(cs_sales_price) 4 | from catalog_sales 5 | ,customer 6 | ,customer_address 7 | ,date_dim 8 | where cs_bill_customer_sk = c_customer_sk 9 | and c_current_addr_sk = ca_address_sk 10 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', 11 | '85392', '85460', '80348', '81792') 12 | or ca_state in ('CA','WA','GA') 13 | or cs_sales_price > 500) 14 | and cs_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip 17 | order by ca_zip 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query15.tpl 21 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q19.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query19.tpl and seed 1930872976 2 | select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item,customer,customer_address,store 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=7 8 | and d_moy=11 9 | and d_year=1999 10 | and ss_customer_sk = c_customer_sk 11 | and c_current_addr_sk = ca_address_sk 12 | and substr(ca_zip,1,5) <> substr(s_zip,1,5) 13 | and ss_store_sk = s_store_sk 14 | group by i_brand 15 | ,i_brand_id 16 | ,i_manufact_id 17 | ,i_manufact 18 | order by ext_price desc 19 | ,i_brand 20 | ,i_brand_id 21 | ,i_manufact_id 22 | ,i_manufact 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query19.tpl 26 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q20.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query20.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(cs_ext_sales_price) as itemrevenue 7 | ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from catalog_sales 10 | ,item 11 | ,date_dim 12 | where cs_item_sk = i_item_sk 13 | and i_category in ('Jewelry', 'Sports', 'Books') 14 | and cs_sold_date_sk = d_date_sk 15 | and d_date between cast('2001-01-12' as date) 16 | and (cast('2001-01-12' as date) + interval '30' day) 17 | group by i_item_id 18 | ,i_item_desc 19 | ,i_category 20 | ,i_class 21 | ,i_current_price 22 | order by i_category 23 | ,i_class 24 | ,i_item_id 25 | ,i_item_desc 26 | ,revenueratio 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query20.tpl 30 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q22.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query22.tpl and seed 1819994127 2 | select i_product_name 3 | ,i_brand 4 | ,i_class 5 | ,i_category 6 | ,avg(inv_quantity_on_hand) qoh 7 | from inventory 8 | ,date_dim 9 | ,item 10 | ,warehouse 11 | where inv_date_sk=d_date_sk 12 | and inv_item_sk=i_item_sk 13 | and inv_warehouse_sk = w_warehouse_sk 14 | and d_month_seq between 1212 and 1212 + 11 15 | group by rollup(i_product_name 16 | ,i_brand 17 | ,i_class 18 | ,i_category) 19 | order by qoh, i_product_name, i_brand, i_class, i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query22.tpl 23 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q26.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query26.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(cs_quantity) agg1, 4 | avg(cs_list_price) agg2, 5 | avg(cs_coupon_amt) agg3, 6 | avg(cs_sales_price) agg4 7 | from catalog_sales, customer_demographics, date_dim, item, promotion 8 | where cs_sold_date_sk = d_date_sk and 9 | cs_item_sk = i_item_sk and 10 | cs_bill_cdemo_sk = cd_demo_sk and 11 | cs_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query26.tpl 22 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q27.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query27.tpl and seed 2017787633 2 | select i_item_id, 3 | s_state, grouping(s_state) g_state, 4 | avg(ss_quantity) agg1, 5 | avg(ss_list_price) agg2, 6 | avg(ss_coupon_amt) agg3, 7 | avg(ss_sales_price) agg4 8 | from store_sales, customer_demographics, date_dim, store, item 9 | where ss_sold_date_sk = d_date_sk and 10 | ss_item_sk = i_item_sk and 11 | ss_store_sk = s_store_sk and 12 | ss_cdemo_sk = cd_demo_sk and 13 | cd_gender = 'M' and 14 | cd_marital_status = 'U' and 15 | cd_education_status = '2 yr Degree' and 16 | d_year = 2001 and 17 | s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') 18 | group by rollup (i_item_id, s_state) 19 | order by i_item_id 20 | ,s_state 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query27.tpl 24 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q3.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query3.tpl and seed 2031708268 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) sum_agg 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manufact_id = 436 12 | and dt.d_moy=12 13 | group by dt.d_year 14 | ,item.i_brand 15 | ,item.i_brand_id 16 | order by dt.d_year 17 | ,sum_agg desc 18 | ,brand_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query3.tpl 22 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q32.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query32.tpl and seed 2031708268 2 | select sum(cs_ext_discount_amt) as `excess discount amount` 3 | from 4 | catalog_sales 5 | ,item 6 | ,date_dim 7 | where 8 | i_manufact_id = 269 9 | and i_item_sk = cs_item_sk 10 | and d_date between '1998-03-18' and 11 | (cast('1998-03-18' as date) + interval '90' day) 12 | and d_date_sk = cs_sold_date_sk 13 | and cs_ext_discount_amt 14 | > ( 15 | select 16 | 1.3 * avg(cs_ext_discount_amt) 17 | from 18 | catalog_sales 19 | ,date_dim 20 | where 21 | cs_item_sk = i_item_sk 22 | and d_date between '1998-03-18' and 23 | (cast('1998-03-18' as date) + interval '90' day) 24 | and d_date_sk = cs_sold_date_sk 25 | ) 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query32.tpl 29 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q36.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query36.tpl and seed 1544728811 2 | select 3 | sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent 11 | from 12 | store_sales 13 | ,date_dim d1 14 | ,item 15 | ,store 16 | where 17 | d1.d_year = 1999 18 | and d1.d_date_sk = ss_sold_date_sk 19 | and i_item_sk = ss_item_sk 20 | and s_store_sk = ss_store_sk 21 | and s_state in ('SD','FL','MI','LA', 22 | 'MO','SC','AL','GA') 23 | group by rollup(i_category,i_class) 24 | order by 25 | lochierarchy desc 26 | ,case when lochierarchy = 0 then i_category end 27 | ,rank_within_parent 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query36.tpl 31 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q37.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query37.tpl and seed 301843662 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, catalog_sales 6 | where i_current_price between 22 and 22 + 30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + interval '60' day) 10 | and i_manufact_id in (678,964,918,849) 11 | and inv_quantity_on_hand between 100 and 500 12 | and cs_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query37.tpl 18 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q38.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query38.tpl and seed 1819994127 2 | select count(*) from ( 3 | select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | intersect 9 | select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212 + 11 14 | intersect 15 | select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212 + 11 20 | ) hot_cust 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query38.tpl 24 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q39b.sql: -------------------------------------------------------------------------------- 1 | with inv as 2 | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 3 | ,stdev,mean, case mean when 0 then null else stdev/mean end cov 4 | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 5 | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean 6 | from inventory 7 | ,item 8 | ,warehouse 9 | ,date_dim 10 | where inv_item_sk = i_item_sk 11 | and inv_warehouse_sk = w_warehouse_sk 12 | and inv_date_sk = d_date_sk 13 | and d_year =1999 14 | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo 15 | where case mean when 0 then 0 else stdev/mean end > 1) 16 | select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov 17 | ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov 18 | from inv inv1,inv inv2 19 | where inv1.i_item_sk = inv2.i_item_sk 20 | and inv1.w_warehouse_sk = inv2.w_warehouse_sk 21 | and inv1.d_moy=4 22 | and inv2.d_moy=4+1 23 | and inv1.cov > 1.5 24 | order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov 25 | ,inv2.d_moy,inv2.mean, inv2.cov 26 | 27 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q40.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query40.tpl and seed 1819994127 2 | select 3 | w_state 4 | ,i_item_id 5 | ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) 6 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before 7 | ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) 8 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after 9 | from 10 | catalog_sales left outer join catalog_returns on 11 | (cs_order_number = cr_order_number 12 | and cs_item_sk = cr_item_sk) 13 | ,warehouse 14 | ,item 15 | ,date_dim 16 | where 17 | i_current_price between 0.99 and 1.49 18 | and i_item_sk = cs_item_sk 19 | and cs_warehouse_sk = w_warehouse_sk 20 | and cs_sold_date_sk = d_date_sk 21 | and d_date between (cast ('1998-04-08' as date) - interval '30' day) 22 | and (cast ('1998-04-08' as date) + interval '30' day) 23 | group by 24 | w_state,i_item_id 25 | order by w_state,i_item_id 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query40.tpl 29 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q42.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query42.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_category_id 4 | ,item.i_category 5 | ,sum(ss_ext_sales_price) 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_category_id 16 | ,item.i_category 17 | order by sum(ss_ext_sales_price) desc,dt.d_year 18 | ,item.i_category_id 19 | ,item.i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query42.tpl 23 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q45.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query45.tpl and seed 2031708268 2 | select ca_zip, ca_county, sum(ws_sales_price) 3 | from web_sales, customer, customer_address, date_dim, item 4 | where ws_bill_customer_sk = c_customer_sk 5 | and c_current_addr_sk = ca_address_sk 6 | and ws_item_sk = i_item_sk 7 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') 8 | or 9 | i_item_id in (select i_item_id 10 | from item 11 | where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 12 | ) 13 | ) 14 | and ws_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip, ca_county 17 | order by ca_zip, ca_county 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query45.tpl 21 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q52.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query52.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) ext_price 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_brand 16 | ,item.i_brand_id 17 | order by dt.d_year 18 | ,ext_price desc 19 | ,brand_id 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query52.tpl 23 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q55.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query55.tpl and seed 2031708268 2 | select i_brand_id brand_id, i_brand brand, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=36 8 | and d_moy=12 9 | and d_year=2001 10 | group by i_brand, i_brand_id 11 | order by ext_price desc, i_brand_id 12 | limit 100 13 | 14 | -- end query 1 in stream 0 using template query55.tpl 15 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q6.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query6.tpl and seed 1819994127 2 | select a.ca_state state, count(*) cnt 3 | from customer_address a 4 | ,customer c 5 | ,store_sales s 6 | ,date_dim d 7 | ,item i 8 | where a.ca_address_sk = c.c_current_addr_sk 9 | and c.c_customer_sk = s.ss_customer_sk 10 | and s.ss_sold_date_sk = d.d_date_sk 11 | and s.ss_item_sk = i.i_item_sk 12 | and d.d_month_seq = 13 | (select distinct (d_month_seq) 14 | from date_dim 15 | where d_year = 2000 16 | and d_moy = 2 ) 17 | and i.i_current_price > 1.2 * 18 | (select avg(j.i_current_price) 19 | from item j 20 | where j.i_category = i.i_category) 21 | group by a.ca_state 22 | having count(*) >= 10 23 | order by cnt 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query6.tpl 27 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q65.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query65.tpl and seed 1819994127 2 | select 3 | s_store_name, 4 | i_item_desc, 5 | sc.revenue, 6 | i_current_price, 7 | i_wholesale_cost, 8 | i_brand 9 | from store, item, 10 | (select ss_store_sk, avg(revenue) as ave 11 | from 12 | (select ss_store_sk, ss_item_sk, 13 | sum(ss_sales_price) as revenue 14 | from store_sales, date_dim 15 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 16 | group by ss_store_sk, ss_item_sk) sa 17 | group by ss_store_sk) sb, 18 | (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue 19 | from store_sales, date_dim 20 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 21 | group by ss_store_sk, ss_item_sk) sc 22 | where sb.ss_store_sk = sc.ss_store_sk and 23 | sc.revenue <= 0.1 * sb.ave and 24 | s_store_sk = sc.ss_store_sk and 25 | i_item_sk = sc.ss_item_sk 26 | order by s_store_name, i_item_desc 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query65.tpl 30 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q7.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query7.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | from store_sales, customer_demographics, date_dim, item, promotion 8 | where ss_sold_date_sk = d_date_sk and 9 | ss_item_sk = i_item_sk and 10 | ss_cdemo_sk = cd_demo_sk and 11 | ss_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query7.tpl 22 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q79.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query79.tpl and seed 2031708268 2 | select 3 | c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit 4 | from 5 | (select ss_ticket_number 6 | ,ss_customer_sk 7 | ,store.s_city 8 | ,sum(ss_coupon_amt) amt 9 | ,sum(ss_net_profit) profit 10 | from store_sales,date_dim,store,household_demographics 11 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 12 | and store_sales.ss_store_sk = store.s_store_sk 13 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 14 | and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) 15 | and date_dim.d_dow = 1 16 | and date_dim.d_year in (1998,1998+1,1998+2) 17 | and store.s_number_employees between 200 and 295 18 | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer 19 | where ss_customer_sk = c_customer_sk 20 | order by c_last_name,c_first_name,substr(s_city,1,30), profit 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query79.tpl 24 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q82.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query82.tpl and seed 55585014 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, store_sales 6 | where i_current_price between 30 and 30+30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + interval '60' day) 10 | and i_manufact_id in (437,129,727,663) 11 | and inv_quantity_on_hand between 100 and 500 12 | and ss_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query82.tpl 18 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q84.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query84.tpl and seed 1819994127 2 | select c_customer_id as customer_id 3 | ,c_last_name || ', ' || c_first_name as customername 4 | from customer 5 | ,customer_address 6 | ,customer_demographics 7 | ,household_demographics 8 | ,income_band 9 | ,store_returns 10 | where ca_city = 'Hopewell' 11 | and c_current_addr_sk = ca_address_sk 12 | and ib_lower_bound >= 32287 13 | and ib_upper_bound <= 32287 + 50000 14 | and ib_income_band_sk = hd_income_band_sk 15 | and cd_demo_sk = c_current_cdemo_sk 16 | and hd_demo_sk = c_current_hdemo_sk 17 | and sr_cdemo_sk = cd_demo_sk 18 | order by c_customer_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query84.tpl 22 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q86.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query86.tpl and seed 1819994127 2 | select 3 | sum(ws_net_paid) as total_sum 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ws_net_paid) desc) as rank_within_parent 11 | from 12 | web_sales 13 | ,date_dim d1 14 | ,item 15 | where 16 | d1.d_month_seq between 1212 and 1212+11 17 | and d1.d_date_sk = ws_sold_date_sk 18 | and i_item_sk = ws_item_sk 19 | group by rollup(i_category,i_class) 20 | order by 21 | lochierarchy desc, 22 | case when lochierarchy = 0 then i_category end, 23 | rank_within_parent 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query86.tpl 27 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q87.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query87.tpl and seed 1819994127 2 | select count(*) 3 | from ((select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212+11) 8 | except 9 | (select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212+11) 14 | except 15 | (select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212+11) 20 | ) cool_cust 21 | 22 | 23 | -- end query 1 in stream 0 using template query87.tpl 24 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q92.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query92.tpl and seed 2031708268 2 | select 3 | sum(ws_ext_discount_amt) as `Excess Discount Amount` 4 | from 5 | web_sales 6 | ,item 7 | ,date_dim 8 | where 9 | i_manufact_id = 269 10 | and i_item_sk = ws_item_sk 11 | and d_date between '1998-03-18' and 12 | (cast('1998-03-18' as date) + interval '90' day) 13 | and d_date_sk = ws_sold_date_sk 14 | and ws_ext_discount_amt 15 | > ( 16 | SELECT 17 | 1.3 * avg(ws_ext_discount_amt) 18 | FROM 19 | web_sales 20 | ,date_dim 21 | WHERE 22 | ws_item_sk = i_item_sk 23 | and d_date between '1998-03-18' and 24 | (cast('1998-03-18' as date) + interval '90' day) 25 | and d_date_sk = ws_sold_date_sk 26 | ) 27 | order by sum(ws_ext_discount_amt) 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query92.tpl 31 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q93.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query93.tpl and seed 1200409435 2 | select ss_customer_sk 3 | ,sum(act_sales) sumsales 4 | from (select ss_item_sk 5 | ,ss_ticket_number 6 | ,ss_customer_sk 7 | ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price 8 | else (ss_quantity*ss_sales_price) end act_sales 9 | from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk 10 | and sr_ticket_number = ss_ticket_number) 11 | ,reason 12 | where sr_reason_sk = r_reason_sk 13 | and r_reason_desc = 'Did not like the warranty') t 14 | group by ss_customer_sk 15 | order by sumsales, ss_customer_sk 16 | limit 100 17 | 18 | -- end query 1 in stream 0 using template query93.tpl 19 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q94.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query94.tpl and seed 2031708268 2 | select 3 | count(distinct ws_order_number) as `order count` 4 | ,sum(ws_ext_ship_cost) as `total shipping cost` 5 | ,sum(ws_net_profit) as `total net profit` 6 | from 7 | web_sales ws1 8 | ,date_dim 9 | ,customer_address 10 | ,web_site 11 | where 12 | d_date between '1999-5-01' and 13 | (cast('1999-5-01' as date) + interval '60' day) 14 | and ws1.ws_ship_date_sk = d_date_sk 15 | and ws1.ws_ship_addr_sk = ca_address_sk 16 | and ca_state = 'TX' 17 | and ws1.ws_web_site_sk = web_site_sk 18 | and web_company_name = 'pri' 19 | and exists (select * 20 | from web_sales ws2 21 | where ws1.ws_order_number = ws2.ws_order_number 22 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 23 | and not exists(select * 24 | from web_returns wr1 25 | where ws1.ws_order_number = wr1.wr_order_number) 26 | order by count(distinct ws_order_number) 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query94.tpl 30 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q96.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query96.tpl and seed 1819994127 2 | select count(*) 3 | from store_sales 4 | ,household_demographics 5 | ,time_dim, store 6 | where ss_sold_time_sk = time_dim.t_time_sk 7 | and ss_hdemo_sk = household_demographics.hd_demo_sk 8 | and ss_store_sk = s_store_sk 9 | and time_dim.t_hour = 8 10 | and time_dim.t_minute >= 30 11 | and household_demographics.hd_dep_count = 5 12 | and store.s_store_name = 'ese' 13 | order by count(*) 14 | limit 100 15 | 16 | -- end query 1 in stream 0 using template query96.tpl 17 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q97.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query97.tpl and seed 1819994127 2 | with ssci as ( 3 | select ss_customer_sk customer_sk 4 | ,ss_item_sk item_sk 5 | from store_sales,date_dim 6 | where ss_sold_date_sk = d_date_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | group by ss_customer_sk 9 | ,ss_item_sk), 10 | csci as( 11 | select cs_bill_customer_sk customer_sk 12 | ,cs_item_sk item_sk 13 | from catalog_sales,date_dim 14 | where cs_sold_date_sk = d_date_sk 15 | and d_month_seq between 1212 and 1212 + 11 16 | group by cs_bill_customer_sk 17 | ,cs_item_sk) 18 | select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only 19 | ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only 20 | ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog 21 | from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk 22 | and ssci.item_sk = csci.item_sk) 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query97.tpl 26 | -------------------------------------------------------------------------------- /jdbc-common/src/main/resources/queries/q98.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query98.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(ss_ext_sales_price) as itemrevenue 7 | ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from 10 | store_sales 11 | ,item 12 | ,date_dim 13 | where 14 | ss_item_sk = i_item_sk 15 | and i_category in ('Jewelry', 'Sports', 'Books') 16 | and ss_sold_date_sk = d_date_sk 17 | and d_date between cast('2001-01-12' as date) 18 | and (cast('2001-01-12' as date) + interval '30' day) 19 | group by 20 | i_item_id 21 | ,i_item_desc 22 | ,i_category 23 | ,i_class 24 | ,i_current_price 25 | order by 26 | i_category 27 | ,i_class 28 | ,i_item_id 29 | ,i_item_desc 30 | ,revenueratio 31 | 32 | -- end query 1 in stream 0 using template query98.tpl 33 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q1.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query1.tpl and seed 2031708268 2 | with customer_total_return as 3 | (select sr_customer_sk as ctr_customer_sk 4 | ,sr_store_sk as ctr_store_sk 5 | ,sum(sr_fee) as ctr_total_return 6 | from store_returns 7 | ,date_dim 8 | where sr_returned_date_sk = d_date_sk 9 | and d_year =2000 10 | group by sr_customer_sk 11 | ,sr_store_sk) 12 | select c_customer_id 13 | from customer_total_return ctr1 14 | ,store 15 | ,customer 16 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 17 | from customer_total_return ctr2 18 | where ctr1.ctr_store_sk = ctr2.ctr_store_sk) 19 | and s_store_sk = ctr1.ctr_store_sk 20 | and s_state = 'NM' 21 | and ctr1.ctr_customer_sk = c_customer_sk 22 | order by c_customer_id 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query1.tpl 26 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q12.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query12.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,i_item_id 7 | ,sum(ws_ext_sales_price) as itemrevenue 8 | ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over 9 | (partition by i_class) as revenueratio 10 | from 11 | web_sales 12 | ,item 13 | ,date_dim 14 | where 15 | ws_item_sk = i_item_sk 16 | and i_category in ('Jewelry', 'Sports', 'Books') 17 | and ws_sold_date_sk = d_date_sk 18 | and d_date between cast('2001-01-12' as date) 19 | and (cast('2001-01-12' as date) + interval '30' day) 20 | group by 21 | i_item_id 22 | ,i_item_desc 23 | ,i_category 24 | ,i_class 25 | ,i_current_price 26 | order by 27 | i_category 28 | ,i_class 29 | ,i_item_id 30 | ,i_item_desc 31 | ,revenueratio 32 | limit 100 33 | 34 | -- end query 1 in stream 0 using template query12.tpl 35 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q15.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query15.tpl and seed 1819994127 2 | select ca_zip 3 | ,sum(cs_sales_price) 4 | from catalog_sales 5 | ,customer 6 | ,customer_address 7 | ,date_dim 8 | where cs_bill_customer_sk = c_customer_sk 9 | and c_current_addr_sk = ca_address_sk 10 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', 11 | '85392', '85460', '80348', '81792') 12 | or ca_state in ('CA','WA','GA') 13 | or cs_sales_price > 500) 14 | and cs_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip 17 | order by ca_zip 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query15.tpl 21 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q19.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query19.tpl and seed 1930872976 2 | select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item,customer,customer_address,store 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=7 8 | and d_moy=11 9 | and d_year=1999 10 | and ss_customer_sk = c_customer_sk 11 | and c_current_addr_sk = ca_address_sk 12 | and substr(ca_zip,1,5) <> substr(s_zip,1,5) 13 | and ss_store_sk = s_store_sk 14 | group by i_brand 15 | ,i_brand_id 16 | ,i_manufact_id 17 | ,i_manufact 18 | order by ext_price desc 19 | ,i_brand 20 | ,i_brand_id 21 | ,i_manufact_id 22 | ,i_manufact 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query19.tpl 26 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q20.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query20.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(cs_ext_sales_price) as itemrevenue 7 | ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from catalog_sales 10 | ,item 11 | ,date_dim 12 | where cs_item_sk = i_item_sk 13 | and i_category in ('Jewelry', 'Sports', 'Books') 14 | and cs_sold_date_sk = d_date_sk 15 | and d_date between cast('2001-01-12' as date) 16 | and (cast('2001-01-12' as date) + interval '30' day) 17 | group by i_item_id 18 | ,i_item_desc 19 | ,i_category 20 | ,i_class 21 | ,i_current_price 22 | order by i_category 23 | ,i_class 24 | ,i_item_id 25 | ,i_item_desc 26 | ,revenueratio 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query20.tpl 30 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q22.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query22.tpl and seed 1819994127 2 | select i_product_name 3 | ,i_brand 4 | ,i_class 5 | ,i_category 6 | ,avg(inv_quantity_on_hand) qoh 7 | from inventory 8 | ,date_dim 9 | ,item 10 | ,warehouse 11 | where inv_date_sk=d_date_sk 12 | and inv_item_sk=i_item_sk 13 | and inv_warehouse_sk = w_warehouse_sk 14 | and d_month_seq between 1212 and 1212 + 11 15 | group by rollup(i_product_name 16 | ,i_brand 17 | ,i_class 18 | ,i_category) 19 | order by qoh, i_product_name, i_brand, i_class, i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query22.tpl 23 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q26.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query26.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(cs_quantity) agg1, 4 | avg(cs_list_price) agg2, 5 | avg(cs_coupon_amt) agg3, 6 | avg(cs_sales_price) agg4 7 | from catalog_sales, customer_demographics, date_dim, item, promotion 8 | where cs_sold_date_sk = d_date_sk and 9 | cs_item_sk = i_item_sk and 10 | cs_bill_cdemo_sk = cd_demo_sk and 11 | cs_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query26.tpl 22 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q27.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query27.tpl and seed 2017787633 2 | select i_item_id, 3 | s_state, grouping(s_state) g_state, 4 | avg(ss_quantity) agg1, 5 | avg(ss_list_price) agg2, 6 | avg(ss_coupon_amt) agg3, 7 | avg(ss_sales_price) agg4 8 | from store_sales, customer_demographics, date_dim, store, item 9 | where ss_sold_date_sk = d_date_sk and 10 | ss_item_sk = i_item_sk and 11 | ss_store_sk = s_store_sk and 12 | ss_cdemo_sk = cd_demo_sk and 13 | cd_gender = 'M' and 14 | cd_marital_status = 'U' and 15 | cd_education_status = '2 yr Degree' and 16 | d_year = 2001 and 17 | s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') 18 | group by rollup (i_item_id, s_state) 19 | order by i_item_id 20 | ,s_state 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query27.tpl 24 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q3.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query3.tpl and seed 2031708268 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) sum_agg 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manufact_id = 436 12 | and dt.d_moy=12 13 | group by dt.d_year 14 | ,item.i_brand 15 | ,item.i_brand_id 16 | order by dt.d_year 17 | ,sum_agg desc 18 | ,brand_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query3.tpl 22 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q32.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query32.tpl and seed 2031708268 2 | select sum(cs_ext_discount_amt) as "excess discount amount" 3 | from 4 | catalog_sales 5 | ,item 6 | ,date_dim 7 | where 8 | i_manufact_id = 269 9 | and i_item_sk = cs_item_sk 10 | and d_date between timestamp '1998-03-18' and 11 | (cast('1998-03-18' as date) + interval '90' day) 12 | and d_date_sk = cs_sold_date_sk 13 | and cs_ext_discount_amt 14 | > ( 15 | select 16 | 1.3 * avg(cs_ext_discount_amt) 17 | from 18 | catalog_sales 19 | ,date_dim 20 | where 21 | cs_item_sk = i_item_sk 22 | and d_date between timestamp '1998-03-18' and 23 | (cast('1998-03-18' as date) + interval '90' day) 24 | and d_date_sk = cs_sold_date_sk 25 | ) 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query32.tpl 29 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q36.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query36.tpl and seed 1544728811 2 | select 3 | sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent 11 | from 12 | store_sales 13 | ,date_dim d1 14 | ,item 15 | ,store 16 | where 17 | d1.d_year = 1999 18 | and d1.d_date_sk = ss_sold_date_sk 19 | and i_item_sk = ss_item_sk 20 | and s_store_sk = ss_store_sk 21 | and s_state in ('SD','FL','MI','LA', 22 | 'MO','SC','AL','GA') 23 | group by rollup(i_category,i_class) 24 | order by 25 | lochierarchy desc 26 | ,case when lochierarchy = 0 then i_category end 27 | ,rank_within_parent 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query36.tpl 31 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q37.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query37.tpl and seed 301843662 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, catalog_sales 6 | where i_current_price between 22 and 22 + 30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + interval '60' day) 10 | and i_manufact_id in (678,964,918,849) 11 | and inv_quantity_on_hand between 100 and 500 12 | and cs_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query37.tpl 18 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q38.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query38.tpl and seed 1819994127 2 | select count(*) from ( 3 | select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | intersect 9 | select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212 + 11 14 | intersect 15 | select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212 + 11 20 | ) hot_cust 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query38.tpl 24 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q39b.sql: -------------------------------------------------------------------------------- 1 | with inv as 2 | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 3 | ,stdev,mean, case mean when 0 then null else stdev/mean end cov 4 | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 5 | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean 6 | from inventory 7 | ,item 8 | ,warehouse 9 | ,date_dim 10 | where inv_item_sk = i_item_sk 11 | and inv_warehouse_sk = w_warehouse_sk 12 | and inv_date_sk = d_date_sk 13 | and d_year =1999 14 | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo 15 | where case mean when 0 then 0 else stdev/mean end > 1) 16 | select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov 17 | ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov 18 | from inv inv1,inv inv2 19 | where inv1.i_item_sk = inv2.i_item_sk 20 | and inv1.w_warehouse_sk = inv2.w_warehouse_sk 21 | and inv1.d_moy=4 22 | and inv2.d_moy=4+1 23 | and inv1.cov > 1.5 24 | order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov 25 | ,inv2.d_moy,inv2.mean, inv2.cov 26 | 27 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q40.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query40.tpl and seed 1819994127 2 | select 3 | w_state 4 | ,i_item_id 5 | ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) 6 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before 7 | ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) 8 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after 9 | from 10 | catalog_sales left outer join catalog_returns on 11 | (cs_order_number = cr_order_number 12 | and cs_item_sk = cr_item_sk) 13 | ,warehouse 14 | ,item 15 | ,date_dim 16 | where 17 | i_current_price between 0.99 and 1.49 18 | and i_item_sk = cs_item_sk 19 | and cs_warehouse_sk = w_warehouse_sk 20 | and cs_sold_date_sk = d_date_sk 21 | and d_date between (cast ('1998-04-08' as date) - interval '30' day) 22 | and (cast ('1998-04-08' as date) + interval '30' day) 23 | group by 24 | w_state,i_item_id 25 | order by w_state,i_item_id 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query40.tpl 29 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q42.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query42.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_category_id 4 | ,item.i_category 5 | ,sum(ss_ext_sales_price) 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_category_id 16 | ,item.i_category 17 | order by sum(ss_ext_sales_price) desc,dt.d_year 18 | ,item.i_category_id 19 | ,item.i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query42.tpl 23 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q45.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query45.tpl and seed 2031708268 2 | select ca_zip, ca_county, sum(ws_sales_price) 3 | from web_sales, customer, customer_address, date_dim, item 4 | where ws_bill_customer_sk = c_customer_sk 5 | and c_current_addr_sk = ca_address_sk 6 | and ws_item_sk = i_item_sk 7 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') 8 | or 9 | i_item_id in (select i_item_id 10 | from item 11 | where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 12 | ) 13 | ) 14 | and ws_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip, ca_county 17 | order by ca_zip, ca_county 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query45.tpl 21 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q52.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query52.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) ext_price 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_brand 16 | ,item.i_brand_id 17 | order by dt.d_year 18 | ,ext_price desc 19 | ,brand_id 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query52.tpl 23 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q55.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query55.tpl and seed 2031708268 2 | select i_brand_id brand_id, i_brand brand, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=36 8 | and d_moy=12 9 | and d_year=2001 10 | group by i_brand, i_brand_id 11 | order by ext_price desc, i_brand_id 12 | limit 100 13 | 14 | -- end query 1 in stream 0 using template query55.tpl 15 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q6.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query6.tpl and seed 1819994127 2 | select a.ca_state state, count(*) cnt 3 | from customer_address a 4 | ,customer c 5 | ,store_sales s 6 | ,date_dim d 7 | ,item i 8 | where a.ca_address_sk = c.c_current_addr_sk 9 | and c.c_customer_sk = s.ss_customer_sk 10 | and s.ss_sold_date_sk = d.d_date_sk 11 | and s.ss_item_sk = i.i_item_sk 12 | and d.d_month_seq = 13 | (select distinct (d_month_seq) 14 | from date_dim 15 | where d_year = 2000 16 | and d_moy = 2 ) 17 | and i.i_current_price > 1.2 * 18 | (select avg(j.i_current_price) 19 | from item j 20 | where j.i_category = i.i_category) 21 | group by a.ca_state 22 | having count(*) >= 10 23 | order by cnt 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query6.tpl 27 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q65.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query65.tpl and seed 1819994127 2 | select 3 | s_store_name, 4 | i_item_desc, 5 | sc.revenue, 6 | i_current_price, 7 | i_wholesale_cost, 8 | i_brand 9 | from store, item, 10 | (select ss_store_sk, avg(revenue) as ave 11 | from 12 | (select ss_store_sk, ss_item_sk, 13 | sum(ss_sales_price) as revenue 14 | from store_sales, date_dim 15 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 16 | group by ss_store_sk, ss_item_sk) sa 17 | group by ss_store_sk) sb, 18 | (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue 19 | from store_sales, date_dim 20 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 21 | group by ss_store_sk, ss_item_sk) sc 22 | where sb.ss_store_sk = sc.ss_store_sk and 23 | sc.revenue <= 0.1 * sb.ave and 24 | s_store_sk = sc.ss_store_sk and 25 | i_item_sk = sc.ss_item_sk 26 | order by s_store_name, i_item_desc 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query65.tpl 30 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q7.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query7.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | from store_sales, customer_demographics, date_dim, item, promotion 8 | where ss_sold_date_sk = d_date_sk and 9 | ss_item_sk = i_item_sk and 10 | ss_cdemo_sk = cd_demo_sk and 11 | ss_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query7.tpl 22 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q79.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query79.tpl and seed 2031708268 2 | select 3 | c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit 4 | from 5 | (select ss_ticket_number 6 | ,ss_customer_sk 7 | ,store.s_city 8 | ,sum(ss_coupon_amt) amt 9 | ,sum(ss_net_profit) profit 10 | from store_sales,date_dim,store,household_demographics 11 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 12 | and store_sales.ss_store_sk = store.s_store_sk 13 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 14 | and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) 15 | and date_dim.d_dow = 1 16 | and date_dim.d_year in (1998,1998+1,1998+2) 17 | and store.s_number_employees between 200 and 295 18 | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer 19 | where ss_customer_sk = c_customer_sk 20 | order by c_last_name,c_first_name,substr(s_city,1,30), profit 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query79.tpl 24 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q82.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query82.tpl and seed 55585014 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, store_sales 6 | where i_current_price between 30 and 30+30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + interval '60' day) 10 | and i_manufact_id in (437,129,727,663) 11 | and inv_quantity_on_hand between 100 and 500 12 | and ss_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query82.tpl 18 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q84.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query84.tpl and seed 1819994127 2 | select c_customer_id as customer_id 3 | ,c_last_name || ', ' || c_first_name as customername 4 | from customer 5 | ,customer_address 6 | ,customer_demographics 7 | ,household_demographics 8 | ,income_band 9 | ,store_returns 10 | where ca_city = 'Hopewell' 11 | and c_current_addr_sk = ca_address_sk 12 | and ib_lower_bound >= 32287 13 | and ib_upper_bound <= 32287 + 50000 14 | and ib_income_band_sk = hd_income_band_sk 15 | and cd_demo_sk = c_current_cdemo_sk 16 | and hd_demo_sk = c_current_hdemo_sk 17 | and sr_cdemo_sk = cd_demo_sk 18 | order by c_customer_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query84.tpl 22 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q86.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query86.tpl and seed 1819994127 2 | select 3 | sum(ws_net_paid) as total_sum 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ws_net_paid) desc) as rank_within_parent 11 | from 12 | web_sales 13 | ,date_dim d1 14 | ,item 15 | where 16 | d1.d_month_seq between 1212 and 1212+11 17 | and d1.d_date_sk = ws_sold_date_sk 18 | and i_item_sk = ws_item_sk 19 | group by rollup(i_category,i_class) 20 | order by 21 | lochierarchy desc, 22 | case when lochierarchy = 0 then i_category end, 23 | rank_within_parent 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query86.tpl 27 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q87.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query87.tpl and seed 1819994127 2 | select count(*) 3 | from ((select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212+11) 8 | except 9 | (select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212+11) 14 | except 15 | (select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212+11) 20 | ) cool_cust 21 | 22 | 23 | -- end query 1 in stream 0 using template query87.tpl 24 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q92.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query92.tpl and seed 2031708268 2 | select 3 | sum(ws_ext_discount_amt) as "Excess Discount Amount" 4 | from 5 | web_sales 6 | ,item 7 | ,date_dim 8 | where 9 | i_manufact_id = 269 10 | and i_item_sk = ws_item_sk 11 | and d_date between timestamp '1998-03-18' and 12 | (cast('1998-03-18' as date) + interval '90' day) 13 | and d_date_sk = ws_sold_date_sk 14 | and ws_ext_discount_amt 15 | > ( 16 | SELECT 17 | 1.3 * avg(ws_ext_discount_amt) 18 | FROM 19 | web_sales 20 | ,date_dim 21 | WHERE 22 | ws_item_sk = i_item_sk 23 | and d_date between timestamp '1998-03-18' and 24 | (cast('1998-03-18' as date) + interval '90' day) 25 | and d_date_sk = ws_sold_date_sk 26 | ) 27 | order by sum(ws_ext_discount_amt) 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query92.tpl 31 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q93.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query93.tpl and seed 1200409435 2 | select ss_customer_sk 3 | ,sum(act_sales) sumsales 4 | from (select ss_item_sk 5 | ,ss_ticket_number 6 | ,ss_customer_sk 7 | ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price 8 | else (ss_quantity*ss_sales_price) end act_sales 9 | from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk 10 | and sr_ticket_number = ss_ticket_number) 11 | ,reason 12 | where sr_reason_sk = r_reason_sk 13 | and r_reason_desc = 'Did not like the warranty') t 14 | group by ss_customer_sk 15 | order by sumsales, ss_customer_sk 16 | limit 100 17 | 18 | -- end query 1 in stream 0 using template query93.tpl 19 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q94.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query94.tpl and seed 2031708268 2 | select 3 | count(distinct ws_order_number) as "order count" 4 | ,sum(ws_ext_ship_cost) as "total shipping cost" 5 | ,sum(ws_net_profit) as "total net profit" 6 | from 7 | web_sales ws1 8 | ,date_dim 9 | ,customer_address 10 | ,web_site 11 | where 12 | d_date between timestamp '1999-5-01' and 13 | (cast('1999-5-01' as date) + interval '60' day) 14 | and ws1.ws_ship_date_sk = d_date_sk 15 | and ws1.ws_ship_addr_sk = ca_address_sk 16 | and ca_state = 'TX' 17 | and ws1.ws_web_site_sk = web_site_sk 18 | and web_company_name = 'pri' 19 | and exists (select * 20 | from web_sales ws2 21 | where ws1.ws_order_number = ws2.ws_order_number 22 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 23 | and not exists(select * 24 | from web_returns wr1 25 | where ws1.ws_order_number = wr1.wr_order_number) 26 | order by count(distinct ws_order_number) 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query94.tpl 30 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q96.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query96.tpl and seed 1819994127 2 | select count(*) 3 | from store_sales 4 | ,household_demographics 5 | ,time_dim, store 6 | where ss_sold_time_sk = time_dim.t_time_sk 7 | and ss_hdemo_sk = household_demographics.hd_demo_sk 8 | and ss_store_sk = s_store_sk 9 | and time_dim.t_hour = 8 10 | and time_dim.t_minute >= 30 11 | and household_demographics.hd_dep_count = 5 12 | and store.s_store_name = 'ese' 13 | order by count(*) 14 | limit 100 15 | 16 | -- end query 1 in stream 0 using template query96.tpl 17 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q97.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query97.tpl and seed 1819994127 2 | with ssci as ( 3 | select ss_customer_sk customer_sk 4 | ,ss_item_sk item_sk 5 | from store_sales,date_dim 6 | where ss_sold_date_sk = d_date_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | group by ss_customer_sk 9 | ,ss_item_sk), 10 | csci as( 11 | select cs_bill_customer_sk customer_sk 12 | ,cs_item_sk item_sk 13 | from catalog_sales,date_dim 14 | where cs_sold_date_sk = d_date_sk 15 | and d_month_seq between 1212 and 1212 + 11 16 | group by cs_bill_customer_sk 17 | ,cs_item_sk) 18 | select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only 19 | ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only 20 | ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog 21 | from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk 22 | and ssci.item_sk = csci.item_sk) 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query97.tpl 26 | -------------------------------------------------------------------------------- /presto-tpcds/src/main/resources/queries/q98.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query98.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(ss_ext_sales_price) as itemrevenue 7 | ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from 10 | store_sales 11 | ,item 12 | ,date_dim 13 | where 14 | ss_item_sk = i_item_sk 15 | and i_category in ('Jewelry', 'Sports', 'Books') 16 | and ss_sold_date_sk = d_date_sk 17 | and d_date between cast('2001-01-12' as date) 18 | and (cast('2001-01-12' as date) + interval '30' day) 19 | group by 20 | i_item_id 21 | ,i_item_desc 22 | ,i_category 23 | ,i_class 24 | ,i_current_price 25 | order by 26 | i_category 27 | ,i_class 28 | ,i_item_id 29 | ,i_item_desc 30 | ,revenueratio 31 | 32 | -- end query 1 in stream 0 using template query98.tpl 33 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q0.sql: -------------------------------------------------------------------------------- 1 | SELECT ss_item_sk, ss_ticket_number FROM store_sales LIMIT 1; 2 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q1.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query1.tpl and seed 2031708268 2 | with customer_total_return as 3 | (select sr_customer_sk as ctr_customer_sk 4 | ,sr_store_sk as ctr_store_sk 5 | ,sum(sr_fee) as ctr_total_return 6 | from store_returns 7 | ,date_dim 8 | where sr_returned_date_sk = d_date_sk 9 | and d_year =2000 10 | group by sr_customer_sk 11 | ,sr_store_sk) 12 | select c_customer_id 13 | from customer_total_return ctr1 14 | ,store 15 | ,customer 16 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 17 | from customer_total_return ctr2 18 | where ctr1.ctr_store_sk = ctr2.ctr_store_sk) 19 | and s_store_sk = ctr1.ctr_store_sk 20 | and s_state = 'NM' 21 | and ctr1.ctr_customer_sk = c_customer_sk 22 | order by c_customer_id 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query1.tpl 26 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q12.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query12.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,i_item_id 7 | ,sum(ws_ext_sales_price) as itemrevenue 8 | ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over 9 | (partition by i_class) as revenueratio 10 | from 11 | web_sales 12 | ,item 13 | ,date_dim 14 | where 15 | ws_item_sk = i_item_sk 16 | and i_category in ('Jewelry', 'Sports', 'Books') 17 | and ws_sold_date_sk = d_date_sk 18 | and d_date between cast('2001-01-12' as date) 19 | and (cast('2001-01-12' as date) + interval '30' day) 20 | group by 21 | i_item_id 22 | ,i_item_desc 23 | ,i_category 24 | ,i_class 25 | ,i_current_price 26 | order by 27 | i_category 28 | ,i_class 29 | ,i_item_id 30 | ,i_item_desc 31 | ,revenueratio 32 | limit 100 33 | 34 | -- end query 1 in stream 0 using template query12.tpl 35 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q15.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query15.tpl and seed 1819994127 2 | select ca_zip 3 | ,sum(cs_sales_price) 4 | from catalog_sales 5 | ,customer 6 | ,customer_address 7 | ,date_dim 8 | where cs_bill_customer_sk = c_customer_sk 9 | and c_current_addr_sk = ca_address_sk 10 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', 11 | '85392', '85460', '80348', '81792') 12 | or ca_state in ('CA','WA','GA') 13 | or cs_sales_price > 500) 14 | and cs_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip 17 | order by ca_zip 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query15.tpl 21 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q19.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query19.tpl and seed 1930872976 2 | select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item,customer,customer_address,store 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=7 8 | and d_moy=11 9 | and d_year=1999 10 | and ss_customer_sk = c_customer_sk 11 | and c_current_addr_sk = ca_address_sk 12 | and substr(ca_zip,1,5) <> substr(s_zip,1,5) 13 | and ss_store_sk = s_store_sk 14 | group by i_brand 15 | ,i_brand_id 16 | ,i_manufact_id 17 | ,i_manufact 18 | order by ext_price desc 19 | ,i_brand 20 | ,i_brand_id 21 | ,i_manufact_id 22 | ,i_manufact 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query19.tpl 26 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q20.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query20.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(cs_ext_sales_price) as itemrevenue 7 | ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from catalog_sales 10 | ,item 11 | ,date_dim 12 | where cs_item_sk = i_item_sk 13 | and i_category in ('Jewelry', 'Sports', 'Books') 14 | and cs_sold_date_sk = d_date_sk 15 | and d_date between cast('2001-01-12' as date) 16 | and (cast('2001-01-12' as date) + interval '30' day) 17 | group by i_item_id 18 | ,i_item_desc 19 | ,i_category 20 | ,i_class 21 | ,i_current_price 22 | order by i_category 23 | ,i_class 24 | ,i_item_id 25 | ,i_item_desc 26 | ,revenueratio 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query20.tpl 30 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q22.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query22.tpl and seed 1819994127 2 | select i_product_name 3 | ,i_brand 4 | ,i_class 5 | ,i_category 6 | ,avg(inv_quantity_on_hand) qoh 7 | from inventory 8 | ,date_dim 9 | ,item 10 | ,warehouse 11 | where inv_date_sk=d_date_sk 12 | and inv_item_sk=i_item_sk 13 | and inv_warehouse_sk = w_warehouse_sk 14 | and d_month_seq between 1212 and 1212 + 11 15 | group by rollup(i_product_name 16 | ,i_brand 17 | ,i_class 18 | ,i_category) 19 | order by qoh, i_product_name, i_brand, i_class, i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query22.tpl 23 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q26.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query26.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(cs_quantity) agg1, 4 | avg(cs_list_price) agg2, 5 | avg(cs_coupon_amt) agg3, 6 | avg(cs_sales_price) agg4 7 | from catalog_sales, customer_demographics, date_dim, item, promotion 8 | where cs_sold_date_sk = d_date_sk and 9 | cs_item_sk = i_item_sk and 10 | cs_bill_cdemo_sk = cd_demo_sk and 11 | cs_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query26.tpl 22 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q27.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query27.tpl and seed 2017787633 2 | select i_item_id, 3 | s_state, grouping(s_state) g_state, 4 | avg(ss_quantity) agg1, 5 | avg(ss_list_price) agg2, 6 | avg(ss_coupon_amt) agg3, 7 | avg(ss_sales_price) agg4 8 | from store_sales, customer_demographics, date_dim, store, item 9 | where ss_sold_date_sk = d_date_sk and 10 | ss_item_sk = i_item_sk and 11 | ss_store_sk = s_store_sk and 12 | ss_cdemo_sk = cd_demo_sk and 13 | cd_gender = 'M' and 14 | cd_marital_status = 'U' and 15 | cd_education_status = '2 yr Degree' and 16 | d_year = 2001 and 17 | s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') 18 | group by rollup (i_item_id, s_state) 19 | order by i_item_id 20 | ,s_state 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query27.tpl 24 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q3.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query3.tpl and seed 2031708268 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) sum_agg 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manufact_id = 436 12 | and dt.d_moy=12 13 | group by dt.d_year 14 | ,item.i_brand 15 | ,item.i_brand_id 16 | order by dt.d_year 17 | ,sum_agg desc 18 | ,brand_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query3.tpl 22 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q32.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query32.tpl and seed 2031708268 2 | select sum(cs_ext_discount_amt) as `excess discount amount` 3 | from 4 | catalog_sales 5 | ,item 6 | ,date_dim 7 | where 8 | i_manufact_id = 269 9 | and i_item_sk = cs_item_sk 10 | and d_date between '1998-03-18' and 11 | (cast('1998-03-18' as date) + interval '90' day) 12 | and d_date_sk = cs_sold_date_sk 13 | and cs_ext_discount_amt 14 | > ( 15 | select 16 | 1.3 * avg(cs_ext_discount_amt) 17 | from 18 | catalog_sales 19 | ,date_dim 20 | where 21 | cs_item_sk = i_item_sk 22 | and d_date between '1998-03-18' and 23 | (cast('1998-03-18' as date) + interval '90' day) 24 | and d_date_sk = cs_sold_date_sk 25 | ) 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query32.tpl 29 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q36.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query36.tpl and seed 1544728811 2 | select 3 | sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent 11 | from 12 | store_sales 13 | ,date_dim d1 14 | ,item 15 | ,store 16 | where 17 | d1.d_year = 1999 18 | and d1.d_date_sk = ss_sold_date_sk 19 | and i_item_sk = ss_item_sk 20 | and s_store_sk = ss_store_sk 21 | and s_state in ('SD','FL','MI','LA', 22 | 'MO','SC','AL','GA') 23 | group by rollup(i_category,i_class) 24 | order by 25 | lochierarchy desc 26 | ,case when lochierarchy = 0 then i_category end 27 | ,rank_within_parent 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query36.tpl 31 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q37.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query37.tpl and seed 301843662 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, catalog_sales 6 | where i_current_price between 22 and 22 + 30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + interval '60' day) 10 | and i_manufact_id in (678,964,918,849) 11 | and inv_quantity_on_hand between 100 and 500 12 | and cs_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query37.tpl 18 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q38.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query38.tpl and seed 1819994127 2 | select count(*) from ( 3 | select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | intersect 9 | select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212 + 11 14 | intersect 15 | select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212 + 11 20 | ) hot_cust 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query38.tpl 24 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q39b.sql: -------------------------------------------------------------------------------- 1 | with inv as 2 | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 3 | ,stdev,mean, case mean when 0 then null else stdev/mean end cov 4 | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 5 | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean 6 | from inventory 7 | ,item 8 | ,warehouse 9 | ,date_dim 10 | where inv_item_sk = i_item_sk 11 | and inv_warehouse_sk = w_warehouse_sk 12 | and inv_date_sk = d_date_sk 13 | and d_year =1999 14 | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo 15 | where case mean when 0 then 0 else stdev/mean end > 1) 16 | select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov 17 | ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov 18 | from inv inv1,inv inv2 19 | where inv1.i_item_sk = inv2.i_item_sk 20 | and inv1.w_warehouse_sk = inv2.w_warehouse_sk 21 | and inv1.d_moy=4 22 | and inv2.d_moy=4+1 23 | and inv1.cov > 1.5 24 | order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov 25 | ,inv2.d_moy,inv2.mean, inv2.cov 26 | 27 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q40.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query40.tpl and seed 1819994127 2 | select 3 | w_state 4 | ,i_item_id 5 | ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) 6 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before 7 | ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) 8 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after 9 | from 10 | catalog_sales left outer join catalog_returns on 11 | (cs_order_number = cr_order_number 12 | and cs_item_sk = cr_item_sk) 13 | ,warehouse 14 | ,item 15 | ,date_dim 16 | where 17 | i_current_price between 0.99 and 1.49 18 | and i_item_sk = cs_item_sk 19 | and cs_warehouse_sk = w_warehouse_sk 20 | and cs_sold_date_sk = d_date_sk 21 | and d_date between (cast ('1998-04-08' as date) - interval '30' day) 22 | and (cast ('1998-04-08' as date) + interval '30' day) 23 | group by 24 | w_state,i_item_id 25 | order by w_state,i_item_id 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query40.tpl 29 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q42.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query42.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_category_id 4 | ,item.i_category 5 | ,sum(ss_ext_sales_price) 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_category_id 16 | ,item.i_category 17 | order by sum(ss_ext_sales_price) desc,dt.d_year 18 | ,item.i_category_id 19 | ,item.i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query42.tpl 23 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q45.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query45.tpl and seed 2031708268 2 | select ca_zip, ca_county, sum(ws_sales_price) 3 | from web_sales, customer, customer_address, date_dim, item 4 | where ws_bill_customer_sk = c_customer_sk 5 | and c_current_addr_sk = ca_address_sk 6 | and ws_item_sk = i_item_sk 7 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') 8 | or 9 | i_item_id in (select i_item_id 10 | from item 11 | where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 12 | ) 13 | ) 14 | and ws_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip, ca_county 17 | order by ca_zip, ca_county 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query45.tpl 21 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q52.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query52.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) ext_price 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_brand 16 | ,item.i_brand_id 17 | order by dt.d_year 18 | ,ext_price desc 19 | ,brand_id 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query52.tpl 23 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q55.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query55.tpl and seed 2031708268 2 | select i_brand_id brand_id, i_brand brand, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=36 8 | and d_moy=12 9 | and d_year=2001 10 | group by i_brand, i_brand_id 11 | order by ext_price desc, i_brand_id 12 | limit 100 13 | 14 | -- end query 1 in stream 0 using template query55.tpl 15 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q6.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query6.tpl and seed 1819994127 2 | select a.ca_state state, count(*) cnt 3 | from customer_address a 4 | ,customer c 5 | ,store_sales s 6 | ,date_dim d 7 | ,item i 8 | where a.ca_address_sk = c.c_current_addr_sk 9 | and c.c_customer_sk = s.ss_customer_sk 10 | and s.ss_sold_date_sk = d.d_date_sk 11 | and s.ss_item_sk = i.i_item_sk 12 | and d.d_month_seq = 13 | (select distinct (d_month_seq) 14 | from date_dim 15 | where d_year = 2000 16 | and d_moy = 2 ) 17 | and i.i_current_price > 1.2 * 18 | (select avg(j.i_current_price) 19 | from item j 20 | where j.i_category = i.i_category) 21 | group by a.ca_state 22 | having count(*) >= 10 23 | order by cnt 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query6.tpl 27 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q65.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query65.tpl and seed 1819994127 2 | select 3 | s_store_name, 4 | i_item_desc, 5 | sc.revenue, 6 | i_current_price, 7 | i_wholesale_cost, 8 | i_brand 9 | from store, item, 10 | (select ss_store_sk, avg(revenue) as ave 11 | from 12 | (select ss_store_sk, ss_item_sk, 13 | sum(ss_sales_price) as revenue 14 | from store_sales, date_dim 15 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 16 | group by ss_store_sk, ss_item_sk) sa 17 | group by ss_store_sk) sb, 18 | (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue 19 | from store_sales, date_dim 20 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 21 | group by ss_store_sk, ss_item_sk) sc 22 | where sb.ss_store_sk = sc.ss_store_sk and 23 | sc.revenue <= 0.1 * sb.ave and 24 | s_store_sk = sc.ss_store_sk and 25 | i_item_sk = sc.ss_item_sk 26 | order by s_store_name, i_item_desc 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query65.tpl 30 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q7.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query7.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | from store_sales, customer_demographics, date_dim, item, promotion 8 | where ss_sold_date_sk = d_date_sk and 9 | ss_item_sk = i_item_sk and 10 | ss_cdemo_sk = cd_demo_sk and 11 | ss_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query7.tpl 22 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q79.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query79.tpl and seed 2031708268 2 | select 3 | c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit 4 | from 5 | (select ss_ticket_number 6 | ,ss_customer_sk 7 | ,store.s_city 8 | ,sum(ss_coupon_amt) amt 9 | ,sum(ss_net_profit) profit 10 | from store_sales,date_dim,store,household_demographics 11 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 12 | and store_sales.ss_store_sk = store.s_store_sk 13 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 14 | and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) 15 | and date_dim.d_dow = 1 16 | and date_dim.d_year in (1998,1998+1,1998+2) 17 | and store.s_number_employees between 200 and 295 18 | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer 19 | where ss_customer_sk = c_customer_sk 20 | order by c_last_name,c_first_name,substr(s_city,1,30), profit 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query79.tpl 24 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q82.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query82.tpl and seed 55585014 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, store_sales 6 | where i_current_price between 30 and 30+30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + interval '60' day) 10 | and i_manufact_id in (437,129,727,663) 11 | and inv_quantity_on_hand between 100 and 500 12 | and ss_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query82.tpl 18 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q84.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query84.tpl and seed 1819994127 2 | select c_customer_id as customer_id 3 | ,c_last_name || ', ' || c_first_name as customername 4 | from customer 5 | ,customer_address 6 | ,customer_demographics 7 | ,household_demographics 8 | ,income_band 9 | ,store_returns 10 | where ca_city = 'Hopewell' 11 | and c_current_addr_sk = ca_address_sk 12 | and ib_lower_bound >= 32287 13 | and ib_upper_bound <= 32287 + 50000 14 | and ib_income_band_sk = hd_income_band_sk 15 | and cd_demo_sk = c_current_cdemo_sk 16 | and hd_demo_sk = c_current_hdemo_sk 17 | and sr_cdemo_sk = cd_demo_sk 18 | order by c_customer_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query84.tpl 22 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q86.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query86.tpl and seed 1819994127 2 | select 3 | sum(ws_net_paid) as total_sum 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ws_net_paid) desc) as rank_within_parent 11 | from 12 | web_sales 13 | ,date_dim d1 14 | ,item 15 | where 16 | d1.d_month_seq between 1212 and 1212+11 17 | and d1.d_date_sk = ws_sold_date_sk 18 | and i_item_sk = ws_item_sk 19 | group by rollup(i_category,i_class) 20 | order by 21 | lochierarchy desc, 22 | case when lochierarchy = 0 then i_category end, 23 | rank_within_parent 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query86.tpl 27 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q87.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query87.tpl and seed 1819994127 2 | select count(*) 3 | from ((select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212+11) 8 | except 9 | (select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212+11) 14 | except 15 | (select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212+11) 20 | ) cool_cust 21 | 22 | 23 | -- end query 1 in stream 0 using template query87.tpl 24 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q90.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query90.tpl and seed 2031708268 2 | select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio 3 | from ( select count(*) amc 4 | from web_sales, household_demographics , time_dim, web_page 5 | where ws_sold_time_sk = time_dim.t_time_sk 6 | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk 7 | and ws_web_page_sk = web_page.wp_web_page_sk 8 | and time_dim.t_hour between 6 and 6+1 9 | and household_demographics.hd_dep_count = 8 10 | and web_page.wp_char_count between 5000 and 5200) `at`, 11 | ( select count(*) pmc 12 | from web_sales, household_demographics , time_dim, web_page 13 | where ws_sold_time_sk = time_dim.t_time_sk 14 | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk 15 | and ws_web_page_sk = web_page.wp_web_page_sk 16 | and time_dim.t_hour between 14 and 14+1 17 | and household_demographics.hd_dep_count = 8 18 | and web_page.wp_char_count between 5000 and 5200) pt 19 | order by am_pm_ratio 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query90.tpl 23 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q92.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query92.tpl and seed 2031708268 2 | select 3 | sum(ws_ext_discount_amt) as `Excess Discount Amount` 4 | from 5 | web_sales 6 | ,item 7 | ,date_dim 8 | where 9 | i_manufact_id = 269 10 | and i_item_sk = ws_item_sk 11 | and d_date between '1998-03-18' and 12 | (cast('1998-03-18' as date) + interval '90' day) 13 | and d_date_sk = ws_sold_date_sk 14 | and ws_ext_discount_amt 15 | > ( 16 | SELECT 17 | 1.3 * avg(ws_ext_discount_amt) 18 | FROM 19 | web_sales 20 | ,date_dim 21 | WHERE 22 | ws_item_sk = i_item_sk 23 | and d_date between '1998-03-18' and 24 | (cast('1998-03-18' as date) + interval '90' day) 25 | and d_date_sk = ws_sold_date_sk 26 | ) 27 | order by sum(ws_ext_discount_amt) 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query92.tpl 31 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q93.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query93.tpl and seed 1200409435 2 | select ss_customer_sk 3 | ,sum(act_sales) sumsales 4 | from (select ss_item_sk 5 | ,ss_ticket_number 6 | ,ss_customer_sk 7 | ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price 8 | else (ss_quantity*ss_sales_price) end act_sales 9 | from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk 10 | and sr_ticket_number = ss_ticket_number) 11 | ,reason 12 | where sr_reason_sk = r_reason_sk 13 | and r_reason_desc = 'Did not like the warranty') t 14 | group by ss_customer_sk 15 | order by sumsales, ss_customer_sk 16 | limit 100 17 | 18 | -- end query 1 in stream 0 using template query93.tpl 19 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q94.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query94.tpl and seed 2031708268 2 | select 3 | count(distinct ws_order_number) as `order count` 4 | ,sum(ws_ext_ship_cost) as `total shipping cost` 5 | ,sum(ws_net_profit) as `total net profit` 6 | from 7 | web_sales ws1 8 | ,date_dim 9 | ,customer_address 10 | ,web_site 11 | where 12 | d_date between '1999-5-01' and 13 | (cast('1999-5-01' as date) + interval '60' day) 14 | and ws1.ws_ship_date_sk = d_date_sk 15 | and ws1.ws_ship_addr_sk = ca_address_sk 16 | and ca_state = 'TX' 17 | and ws1.ws_web_site_sk = web_site_sk 18 | and web_company_name = 'pri' 19 | and exists (select * 20 | from web_sales ws2 21 | where ws1.ws_order_number = ws2.ws_order_number 22 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 23 | and not exists(select * 24 | from web_returns wr1 25 | where ws1.ws_order_number = wr1.wr_order_number) 26 | order by count(distinct ws_order_number) 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query94.tpl 30 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q96.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query96.tpl and seed 1819994127 2 | select count(*) 3 | from store_sales 4 | ,household_demographics 5 | ,time_dim, store 6 | where ss_sold_time_sk = time_dim.t_time_sk 7 | and ss_hdemo_sk = household_demographics.hd_demo_sk 8 | and ss_store_sk = s_store_sk 9 | and time_dim.t_hour = 8 10 | and time_dim.t_minute >= 30 11 | and household_demographics.hd_dep_count = 5 12 | and store.s_store_name = 'ese' 13 | order by count(*) 14 | limit 100 15 | 16 | -- end query 1 in stream 0 using template query96.tpl 17 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q97.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query97.tpl and seed 1819994127 2 | with ssci as ( 3 | select ss_customer_sk customer_sk 4 | ,ss_item_sk item_sk 5 | from store_sales,date_dim 6 | where ss_sold_date_sk = d_date_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | group by ss_customer_sk 9 | ,ss_item_sk), 10 | csci as( 11 | select cs_bill_customer_sk customer_sk 12 | ,cs_item_sk item_sk 13 | from catalog_sales,date_dim 14 | where cs_sold_date_sk = d_date_sk 15 | and d_month_seq between 1212 and 1212 + 11 16 | group by cs_bill_customer_sk 17 | ,cs_item_sk) 18 | select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only 19 | ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only 20 | ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog 21 | from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk 22 | and ssci.item_sk = csci.item_sk) 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query97.tpl 26 | -------------------------------------------------------------------------------- /spark-hudi/src/main/resources/queries/q98.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query98.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(ss_ext_sales_price) as itemrevenue 7 | ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from 10 | store_sales 11 | ,item 12 | ,date_dim 13 | where 14 | ss_item_sk = i_item_sk 15 | and i_category in ('Jewelry', 'Sports', 'Books') 16 | and ss_sold_date_sk = d_date_sk 17 | and d_date between cast('2001-01-12' as date) 18 | and (cast('2001-01-12' as date) + interval '30' day) 19 | group by 20 | i_item_id 21 | ,i_item_desc 22 | ,i_category 23 | ,i_class 24 | ,i_current_price 25 | order by 26 | i_category 27 | ,i_class 28 | ,i_item_id 29 | ,i_item_desc 30 | ,revenueratio 31 | 32 | -- end query 1 in stream 0 using template query98.tpl 33 | -------------------------------------------------------------------------------- /spark-hudi/src/main/scala/spark3/sql/CliOptions.java: -------------------------------------------------------------------------------- 1 | package spark3.sql; 2 | 3 | import org.apache.hudi.com.beust.jcommander.Parameter; 4 | 5 | import java.io.Serializable; 6 | 7 | public class CliOptions implements Serializable { 8 | 9 | @Parameter(names = {"-q", "--queries"}, 10 | description = "sql query names. If the value is 'all', all queries will be executed.", 11 | required = true) 12 | public String queries; 13 | 14 | @Parameter(names = {"-d", "--database"}, 15 | description = "sql query names. If the value is 'all', all queries will be executed.", 16 | required = true) 17 | public String database; 18 | } 19 | -------------------------------------------------------------------------------- /spark-iceberg/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | benchmark-parent 5 | com.github.deadwind4 6 | 1.0-SNAPSHOT 7 | 8 | 4.0.0 9 | spark-iceberg 10 | Archetype - spark-iceberg 11 | http://maven.apache.org 12 | 13 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q1.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query1.tpl and seed 2031708268 2 | with customer_total_return as 3 | (select sr_customer_sk as ctr_customer_sk 4 | ,sr_store_sk as ctr_store_sk 5 | ,sum(sr_fee) as ctr_total_return 6 | from store_returns 7 | ,date_dim 8 | where sr_returned_date_sk = d_date_sk 9 | and d_year =2000 10 | group by sr_customer_sk 11 | ,sr_store_sk) 12 | select c_customer_id 13 | from customer_total_return ctr1 14 | ,store 15 | ,customer 16 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 17 | from customer_total_return ctr2 18 | where ctr1.ctr_store_sk = ctr2.ctr_store_sk) 19 | and s_store_sk = ctr1.ctr_store_sk 20 | and s_state = 'NM' 21 | and ctr1.ctr_customer_sk = c_customer_sk 22 | order by c_customer_id 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query1.tpl 26 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q12.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query12.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,i_item_id 7 | ,sum(ws_ext_sales_price) as itemrevenue 8 | ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over 9 | (partition by i_class) as revenueratio 10 | from 11 | web_sales 12 | ,item 13 | ,date_dim 14 | where 15 | ws_item_sk = i_item_sk 16 | and i_category in ('Jewelry', 'Sports', 'Books') 17 | and ws_sold_date_sk = d_date_sk 18 | and d_date between cast('2001-01-12' as date) 19 | and (cast('2001-01-12' as date) + interval '30' day) 20 | group by 21 | i_item_id 22 | ,i_item_desc 23 | ,i_category 24 | ,i_class 25 | ,i_current_price 26 | order by 27 | i_category 28 | ,i_class 29 | ,i_item_id 30 | ,i_item_desc 31 | ,revenueratio 32 | limit 100 33 | 34 | -- end query 1 in stream 0 using template query12.tpl 35 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q15.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query15.tpl and seed 1819994127 2 | select ca_zip 3 | ,sum(cs_sales_price) 4 | from catalog_sales 5 | ,customer 6 | ,customer_address 7 | ,date_dim 8 | where cs_bill_customer_sk = c_customer_sk 9 | and c_current_addr_sk = ca_address_sk 10 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', 11 | '85392', '85460', '80348', '81792') 12 | or ca_state in ('CA','WA','GA') 13 | or cs_sales_price > 500) 14 | and cs_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip 17 | order by ca_zip 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query15.tpl 21 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q19.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query19.tpl and seed 1930872976 2 | select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item,customer,customer_address,store 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=7 8 | and d_moy=11 9 | and d_year=1999 10 | and ss_customer_sk = c_customer_sk 11 | and c_current_addr_sk = ca_address_sk 12 | and substr(ca_zip,1,5) <> substr(s_zip,1,5) 13 | and ss_store_sk = s_store_sk 14 | group by i_brand 15 | ,i_brand_id 16 | ,i_manufact_id 17 | ,i_manufact 18 | order by ext_price desc 19 | ,i_brand 20 | ,i_brand_id 21 | ,i_manufact_id 22 | ,i_manufact 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query19.tpl 26 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q20.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query20.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(cs_ext_sales_price) as itemrevenue 7 | ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from catalog_sales 10 | ,item 11 | ,date_dim 12 | where cs_item_sk = i_item_sk 13 | and i_category in ('Jewelry', 'Sports', 'Books') 14 | and cs_sold_date_sk = d_date_sk 15 | and d_date between cast('2001-01-12' as date) 16 | and (cast('2001-01-12' as date) + interval '30' day) 17 | group by i_item_id 18 | ,i_item_desc 19 | ,i_category 20 | ,i_class 21 | ,i_current_price 22 | order by i_category 23 | ,i_class 24 | ,i_item_id 25 | ,i_item_desc 26 | ,revenueratio 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query20.tpl 30 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q22.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query22.tpl and seed 1819994127 2 | select i_product_name 3 | ,i_brand 4 | ,i_class 5 | ,i_category 6 | ,avg(inv_quantity_on_hand) qoh 7 | from inventory 8 | ,date_dim 9 | ,item 10 | ,warehouse 11 | where inv_date_sk=d_date_sk 12 | and inv_item_sk=i_item_sk 13 | and inv_warehouse_sk = w_warehouse_sk 14 | and d_month_seq between 1212 and 1212 + 11 15 | group by rollup(i_product_name 16 | ,i_brand 17 | ,i_class 18 | ,i_category) 19 | order by qoh, i_product_name, i_brand, i_class, i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query22.tpl 23 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q26.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query26.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(cs_quantity) agg1, 4 | avg(cs_list_price) agg2, 5 | avg(cs_coupon_amt) agg3, 6 | avg(cs_sales_price) agg4 7 | from catalog_sales, customer_demographics, date_dim, item, promotion 8 | where cs_sold_date_sk = d_date_sk and 9 | cs_item_sk = i_item_sk and 10 | cs_bill_cdemo_sk = cd_demo_sk and 11 | cs_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query26.tpl 22 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q27.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query27.tpl and seed 2017787633 2 | select i_item_id, 3 | s_state, grouping(s_state) g_state, 4 | avg(ss_quantity) agg1, 5 | avg(ss_list_price) agg2, 6 | avg(ss_coupon_amt) agg3, 7 | avg(ss_sales_price) agg4 8 | from store_sales, customer_demographics, date_dim, store, item 9 | where ss_sold_date_sk = d_date_sk and 10 | ss_item_sk = i_item_sk and 11 | ss_store_sk = s_store_sk and 12 | ss_cdemo_sk = cd_demo_sk and 13 | cd_gender = 'M' and 14 | cd_marital_status = 'U' and 15 | cd_education_status = '2 yr Degree' and 16 | d_year = 2001 and 17 | s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') 18 | group by rollup (i_item_id, s_state) 19 | order by i_item_id 20 | ,s_state 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query27.tpl 24 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q3.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query3.tpl and seed 2031708268 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) sum_agg 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manufact_id = 436 12 | and dt.d_moy=12 13 | group by dt.d_year 14 | ,item.i_brand 15 | ,item.i_brand_id 16 | order by dt.d_year 17 | ,sum_agg desc 18 | ,brand_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query3.tpl 22 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q32.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query32.tpl and seed 2031708268 2 | select sum(cs_ext_discount_amt) as "excess discount amount" 3 | from 4 | catalog_sales 5 | ,item 6 | ,date_dim 7 | where 8 | i_manufact_id = 269 9 | and i_item_sk = cs_item_sk 10 | and d_date between timestamp '1998-03-18' and 11 | (cast('1998-03-18' as date) + interval '90' day) 12 | and d_date_sk = cs_sold_date_sk 13 | and cs_ext_discount_amt 14 | > ( 15 | select 16 | 1.3 * avg(cs_ext_discount_amt) 17 | from 18 | catalog_sales 19 | ,date_dim 20 | where 21 | cs_item_sk = i_item_sk 22 | and d_date between timestamp '1998-03-18' and 23 | (cast('1998-03-18' as date) + interval '90' day) 24 | and d_date_sk = cs_sold_date_sk 25 | ) 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query32.tpl 29 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q36.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query36.tpl and seed 1544728811 2 | select 3 | sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent 11 | from 12 | store_sales 13 | ,date_dim d1 14 | ,item 15 | ,store 16 | where 17 | d1.d_year = 1999 18 | and d1.d_date_sk = ss_sold_date_sk 19 | and i_item_sk = ss_item_sk 20 | and s_store_sk = ss_store_sk 21 | and s_state in ('SD','FL','MI','LA', 22 | 'MO','SC','AL','GA') 23 | group by rollup(i_category,i_class) 24 | order by 25 | lochierarchy desc 26 | ,case when lochierarchy = 0 then i_category end 27 | ,rank_within_parent 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query36.tpl 31 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q37.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query37.tpl and seed 301843662 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, catalog_sales 6 | where i_current_price between 22 and 22 + 30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + interval '60' day) 10 | and i_manufact_id in (678,964,918,849) 11 | and inv_quantity_on_hand between 100 and 500 12 | and cs_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query37.tpl 18 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q38.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query38.tpl and seed 1819994127 2 | select count(*) from ( 3 | select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | intersect 9 | select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212 + 11 14 | intersect 15 | select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212 + 11 20 | ) hot_cust 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query38.tpl 24 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q39b.sql: -------------------------------------------------------------------------------- 1 | with inv as 2 | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 3 | ,stdev,mean, case mean when 0 then null else stdev/mean end cov 4 | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 5 | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean 6 | from inventory 7 | ,item 8 | ,warehouse 9 | ,date_dim 10 | where inv_item_sk = i_item_sk 11 | and inv_warehouse_sk = w_warehouse_sk 12 | and inv_date_sk = d_date_sk 13 | and d_year =1999 14 | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo 15 | where case mean when 0 then 0 else stdev/mean end > 1) 16 | select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov 17 | ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov 18 | from inv inv1,inv inv2 19 | where inv1.i_item_sk = inv2.i_item_sk 20 | and inv1.w_warehouse_sk = inv2.w_warehouse_sk 21 | and inv1.d_moy=4 22 | and inv2.d_moy=4+1 23 | and inv1.cov > 1.5 24 | order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov 25 | ,inv2.d_moy,inv2.mean, inv2.cov 26 | 27 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q40.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query40.tpl and seed 1819994127 2 | select 3 | w_state 4 | ,i_item_id 5 | ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) 6 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before 7 | ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) 8 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after 9 | from 10 | catalog_sales left outer join catalog_returns on 11 | (cs_order_number = cr_order_number 12 | and cs_item_sk = cr_item_sk) 13 | ,warehouse 14 | ,item 15 | ,date_dim 16 | where 17 | i_current_price between 0.99 and 1.49 18 | and i_item_sk = cs_item_sk 19 | and cs_warehouse_sk = w_warehouse_sk 20 | and cs_sold_date_sk = d_date_sk 21 | and d_date between (cast ('1998-04-08' as date) - interval '30' day) 22 | and (cast ('1998-04-08' as date) + interval '30' day) 23 | group by 24 | w_state,i_item_id 25 | order by w_state,i_item_id 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query40.tpl 29 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q42.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query42.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_category_id 4 | ,item.i_category 5 | ,sum(ss_ext_sales_price) 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_category_id 16 | ,item.i_category 17 | order by sum(ss_ext_sales_price) desc,dt.d_year 18 | ,item.i_category_id 19 | ,item.i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query42.tpl 23 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q45.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query45.tpl and seed 2031708268 2 | select ca_zip, ca_county, sum(ws_sales_price) 3 | from web_sales, customer, customer_address, date_dim, item 4 | where ws_bill_customer_sk = c_customer_sk 5 | and c_current_addr_sk = ca_address_sk 6 | and ws_item_sk = i_item_sk 7 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') 8 | or 9 | i_item_id in (select i_item_id 10 | from item 11 | where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 12 | ) 13 | ) 14 | and ws_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip, ca_county 17 | order by ca_zip, ca_county 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query45.tpl 21 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q52.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query52.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) ext_price 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_brand 16 | ,item.i_brand_id 17 | order by dt.d_year 18 | ,ext_price desc 19 | ,brand_id 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query52.tpl 23 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q55.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query55.tpl and seed 2031708268 2 | select i_brand_id brand_id, i_brand brand, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=36 8 | and d_moy=12 9 | and d_year=2001 10 | group by i_brand, i_brand_id 11 | order by ext_price desc, i_brand_id 12 | limit 100 13 | 14 | -- end query 1 in stream 0 using template query55.tpl 15 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q6.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query6.tpl and seed 1819994127 2 | select a.ca_state state, count(*) cnt 3 | from customer_address a 4 | ,customer c 5 | ,store_sales s 6 | ,date_dim d 7 | ,item i 8 | where a.ca_address_sk = c.c_current_addr_sk 9 | and c.c_customer_sk = s.ss_customer_sk 10 | and s.ss_sold_date_sk = d.d_date_sk 11 | and s.ss_item_sk = i.i_item_sk 12 | and d.d_month_seq = 13 | (select distinct (d_month_seq) 14 | from date_dim 15 | where d_year = 2000 16 | and d_moy = 2 ) 17 | and i.i_current_price > 1.2 * 18 | (select avg(j.i_current_price) 19 | from item j 20 | where j.i_category = i.i_category) 21 | group by a.ca_state 22 | having count(*) >= 10 23 | order by cnt 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query6.tpl 27 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q65.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query65.tpl and seed 1819994127 2 | select 3 | s_store_name, 4 | i_item_desc, 5 | sc.revenue, 6 | i_current_price, 7 | i_wholesale_cost, 8 | i_brand 9 | from store, item, 10 | (select ss_store_sk, avg(revenue) as ave 11 | from 12 | (select ss_store_sk, ss_item_sk, 13 | sum(ss_sales_price) as revenue 14 | from store_sales, date_dim 15 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 16 | group by ss_store_sk, ss_item_sk) sa 17 | group by ss_store_sk) sb, 18 | (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue 19 | from store_sales, date_dim 20 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 21 | group by ss_store_sk, ss_item_sk) sc 22 | where sb.ss_store_sk = sc.ss_store_sk and 23 | sc.revenue <= 0.1 * sb.ave and 24 | s_store_sk = sc.ss_store_sk and 25 | i_item_sk = sc.ss_item_sk 26 | order by s_store_name, i_item_desc 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query65.tpl 30 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q7.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query7.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | from store_sales, customer_demographics, date_dim, item, promotion 8 | where ss_sold_date_sk = d_date_sk and 9 | ss_item_sk = i_item_sk and 10 | ss_cdemo_sk = cd_demo_sk and 11 | ss_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query7.tpl 22 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q79.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query79.tpl and seed 2031708268 2 | select 3 | c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit 4 | from 5 | (select ss_ticket_number 6 | ,ss_customer_sk 7 | ,store.s_city 8 | ,sum(ss_coupon_amt) amt 9 | ,sum(ss_net_profit) profit 10 | from store_sales,date_dim,store,household_demographics 11 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 12 | and store_sales.ss_store_sk = store.s_store_sk 13 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 14 | and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) 15 | and date_dim.d_dow = 1 16 | and date_dim.d_year in (1998,1998+1,1998+2) 17 | and store.s_number_employees between 200 and 295 18 | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer 19 | where ss_customer_sk = c_customer_sk 20 | order by c_last_name,c_first_name,substr(s_city,1,30), profit 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query79.tpl 24 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q82.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query82.tpl and seed 55585014 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, store_sales 6 | where i_current_price between 30 and 30+30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + interval '60' day) 10 | and i_manufact_id in (437,129,727,663) 11 | and inv_quantity_on_hand between 100 and 500 12 | and ss_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query82.tpl 18 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q84.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query84.tpl and seed 1819994127 2 | select c_customer_id as customer_id 3 | ,c_last_name || ', ' || c_first_name as customername 4 | from customer 5 | ,customer_address 6 | ,customer_demographics 7 | ,household_demographics 8 | ,income_band 9 | ,store_returns 10 | where ca_city = 'Hopewell' 11 | and c_current_addr_sk = ca_address_sk 12 | and ib_lower_bound >= 32287 13 | and ib_upper_bound <= 32287 + 50000 14 | and ib_income_band_sk = hd_income_band_sk 15 | and cd_demo_sk = c_current_cdemo_sk 16 | and hd_demo_sk = c_current_hdemo_sk 17 | and sr_cdemo_sk = cd_demo_sk 18 | order by c_customer_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query84.tpl 22 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q86.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query86.tpl and seed 1819994127 2 | select 3 | sum(ws_net_paid) as total_sum 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ws_net_paid) desc) as rank_within_parent 11 | from 12 | web_sales 13 | ,date_dim d1 14 | ,item 15 | where 16 | d1.d_month_seq between 1212 and 1212+11 17 | and d1.d_date_sk = ws_sold_date_sk 18 | and i_item_sk = ws_item_sk 19 | group by rollup(i_category,i_class) 20 | order by 21 | lochierarchy desc, 22 | case when lochierarchy = 0 then i_category end, 23 | rank_within_parent 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query86.tpl 27 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q87.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query87.tpl and seed 1819994127 2 | select count(*) 3 | from ((select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212+11) 8 | except 9 | (select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212+11) 14 | except 15 | (select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212+11) 20 | ) cool_cust 21 | 22 | 23 | -- end query 1 in stream 0 using template query87.tpl 24 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q92.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query92.tpl and seed 2031708268 2 | select 3 | sum(ws_ext_discount_amt) as "Excess Discount Amount" 4 | from 5 | web_sales 6 | ,item 7 | ,date_dim 8 | where 9 | i_manufact_id = 269 10 | and i_item_sk = ws_item_sk 11 | and d_date between timestamp '1998-03-18' and 12 | (cast('1998-03-18' as date) + interval '90' day) 13 | and d_date_sk = ws_sold_date_sk 14 | and ws_ext_discount_amt 15 | > ( 16 | SELECT 17 | 1.3 * avg(ws_ext_discount_amt) 18 | FROM 19 | web_sales 20 | ,date_dim 21 | WHERE 22 | ws_item_sk = i_item_sk 23 | and d_date between timestamp '1998-03-18' and 24 | (cast('1998-03-18' as date) + interval '90' day) 25 | and d_date_sk = ws_sold_date_sk 26 | ) 27 | order by sum(ws_ext_discount_amt) 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query92.tpl 31 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q93.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query93.tpl and seed 1200409435 2 | select ss_customer_sk 3 | ,sum(act_sales) sumsales 4 | from (select ss_item_sk 5 | ,ss_ticket_number 6 | ,ss_customer_sk 7 | ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price 8 | else (ss_quantity*ss_sales_price) end act_sales 9 | from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk 10 | and sr_ticket_number = ss_ticket_number) 11 | ,reason 12 | where sr_reason_sk = r_reason_sk 13 | and r_reason_desc = 'Did not like the warranty') t 14 | group by ss_customer_sk 15 | order by sumsales, ss_customer_sk 16 | limit 100 17 | 18 | -- end query 1 in stream 0 using template query93.tpl 19 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q94.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query94.tpl and seed 2031708268 2 | select 3 | count(distinct ws_order_number) as "order count" 4 | ,sum(ws_ext_ship_cost) as "total shipping cost" 5 | ,sum(ws_net_profit) as "total net profit" 6 | from 7 | web_sales ws1 8 | ,date_dim 9 | ,customer_address 10 | ,web_site 11 | where 12 | d_date between timestamp '1999-5-01' and 13 | (cast('1999-5-01' as date) + interval '60' day) 14 | and ws1.ws_ship_date_sk = d_date_sk 15 | and ws1.ws_ship_addr_sk = ca_address_sk 16 | and ca_state = 'TX' 17 | and ws1.ws_web_site_sk = web_site_sk 18 | and web_company_name = 'pri' 19 | and exists (select * 20 | from web_sales ws2 21 | where ws1.ws_order_number = ws2.ws_order_number 22 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 23 | and not exists(select * 24 | from web_returns wr1 25 | where ws1.ws_order_number = wr1.wr_order_number) 26 | order by count(distinct ws_order_number) 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query94.tpl 30 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q96.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query96.tpl and seed 1819994127 2 | select count(*) 3 | from store_sales 4 | ,household_demographics 5 | ,time_dim, store 6 | where ss_sold_time_sk = time_dim.t_time_sk 7 | and ss_hdemo_sk = household_demographics.hd_demo_sk 8 | and ss_store_sk = s_store_sk 9 | and time_dim.t_hour = 8 10 | and time_dim.t_minute >= 30 11 | and household_demographics.hd_dep_count = 5 12 | and store.s_store_name = 'ese' 13 | order by count(*) 14 | limit 100 15 | 16 | -- end query 1 in stream 0 using template query96.tpl 17 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q97.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query97.tpl and seed 1819994127 2 | with ssci as ( 3 | select ss_customer_sk customer_sk 4 | ,ss_item_sk item_sk 5 | from store_sales,date_dim 6 | where ss_sold_date_sk = d_date_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | group by ss_customer_sk 9 | ,ss_item_sk), 10 | csci as( 11 | select cs_bill_customer_sk customer_sk 12 | ,cs_item_sk item_sk 13 | from catalog_sales,date_dim 14 | where cs_sold_date_sk = d_date_sk 15 | and d_month_seq between 1212 and 1212 + 11 16 | group by cs_bill_customer_sk 17 | ,cs_item_sk) 18 | select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only 19 | ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only 20 | ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog 21 | from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk 22 | and ssci.item_sk = csci.item_sk) 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query97.tpl 26 | -------------------------------------------------------------------------------- /trino-tpcds/src/main/resources/queries/q98.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query98.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(ss_ext_sales_price) as itemrevenue 7 | ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from 10 | store_sales 11 | ,item 12 | ,date_dim 13 | where 14 | ss_item_sk = i_item_sk 15 | and i_category in ('Jewelry', 'Sports', 'Books') 16 | and ss_sold_date_sk = d_date_sk 17 | and d_date between cast('2001-01-12' as date) 18 | and (cast('2001-01-12' as date) + interval '30' day) 19 | group by 20 | i_item_id 21 | ,i_item_desc 22 | ,i_category 23 | ,i_class 24 | ,i_current_price 25 | order by 26 | i_category 27 | ,i_class 28 | ,i_item_id 29 | ,i_item_desc 30 | ,revenueratio 31 | 32 | -- end query 1 in stream 0 using template query98.tpl 33 | --------------------------------------------------------------------------------