├── .github ├── dependabot.yaml ├── scripts │ └── spark │ │ ├── setup.sh │ │ ├── start.sh │ │ └── start_thrift_server.sh └── workflows │ ├── maven.yaml │ └── webapp-deploy.yaml ├── .gitignore ├── .gitmodules ├── .license-header.tmpl ├── .mvn └── wrapper │ └── maven-wrapper.properties ├── CITATION.bib ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── adapters └── cab-converter │ ├── README.md │ ├── cab-converter.ps1 │ ├── cab-converter.sh │ ├── pom.xml │ ├── sql │ └── spark-3.3.1 │ │ ├── build │ │ └── build.sql │ │ ├── run │ │ ├── query_1.sql │ │ ├── query_10.sql │ │ ├── query_11.sql │ │ ├── query_12.sql │ │ ├── query_13.sql │ │ ├── query_14.sql │ │ ├── query_15.sql │ │ ├── query_16.sql │ │ ├── query_17.sql │ │ ├── query_18.sql │ │ ├── query_19.sql │ │ ├── query_2.sql │ │ ├── query_20.sql │ │ ├── query_21.sql │ │ ├── query_22.sql │ │ ├── query_23.sql │ │ ├── query_3.sql │ │ ├── query_4.sql │ │ ├── query_5.sql │ │ ├── query_6.sql │ │ ├── query_7.sql │ │ ├── query_8.sql │ │ └── query_9.sql │ │ └── setup │ │ └── setup.sql │ └── src │ ├── main │ └── java │ │ └── com │ │ └── microsoft │ │ └── lst_bench │ │ └── cab_converter │ │ ├── ConnectionGenMode.java │ │ ├── Converter.java │ │ └── Driver.java │ └── test │ ├── java │ └── com │ │ └── microsoft │ │ └── lst_bench │ │ └── cab_converter │ │ └── DriverConverterTest.java │ └── resources │ └── small_1tb_10cpu │ ├── query_stream_0.json │ ├── query_stream_1.json │ ├── query_stream_10.json │ ├── query_stream_11.json │ ├── query_stream_12.json │ ├── query_stream_13.json │ ├── query_stream_14.json │ ├── query_stream_15.json │ ├── query_stream_16.json │ ├── query_stream_17.json │ ├── query_stream_18.json │ ├── query_stream_19.json │ ├── query_stream_2.json │ ├── query_stream_3.json │ ├── query_stream_4.json │ ├── query_stream_5.json │ ├── query_stream_6.json │ ├── query_stream_7.json │ ├── query_stream_8.json │ └── query_stream_9.json ├── core ├── metrics │ ├── app │ │ ├── README.md │ │ ├── main.py │ │ ├── requirements.txt │ │ └── utils.py │ ├── notebooks │ │ ├── README.md │ │ ├── clusterPlots.ipynb │ │ ├── execTimePlots.ipynb │ │ ├── listExperiments.ipynb │ │ ├── requirements.txt │ │ └── storagePlots.ipynb │ └── utils │ │ ├── azure │ │ └── azure_utils.py │ │ ├── clusterMetrics.py │ │ ├── constant.py │ │ ├── functions.py │ │ └── storageMetrics.py ├── pom.xml ├── run │ ├── README.md │ ├── auxiliary │ │ ├── tpcds │ │ │ ├── data_maintenance │ │ │ │ └── parameter_values.dat │ │ │ ├── setup_data_maintenance │ │ │ │ └── parameter_values.dat │ │ │ └── single_user │ │ │ │ └── permutation_orders │ │ │ │ ├── 000000.dat │ │ │ │ ├── 000001.dat │ │ │ │ ├── 000002.dat │ │ │ │ ├── 000003.dat │ │ │ │ ├── 000004.dat │ │ │ │ ├── 000005.dat │ │ │ │ ├── 000006.dat │ │ │ │ ├── 000007.dat │ │ │ │ ├── 000008.dat │ │ │ │ ├── 000009.dat │ │ │ │ ├── 000010.dat │ │ │ │ ├── 000011.dat │ │ │ │ ├── 000012.dat │ │ │ │ ├── 000013.dat │ │ │ │ ├── 000014.dat │ │ │ │ ├── 000015.dat │ │ │ │ ├── 000016.dat │ │ │ │ ├── 000017.dat │ │ │ │ ├── 000018.dat │ │ │ │ ├── 000019.dat │ │ │ │ ├── 000020.dat │ │ │ │ ├── 000021.dat │ │ │ │ ├── 000022.dat │ │ │ │ ├── 000023.dat │ │ │ │ ├── 000024.dat │ │ │ │ ├── 000025.dat │ │ │ │ ├── 000026.dat │ │ │ │ ├── 000027.dat │ │ │ │ ├── 000028.dat │ │ │ │ ├── 000029.dat │ │ │ │ ├── 000030.dat │ │ │ │ ├── 000031.dat │ │ │ │ ├── 000032.dat │ │ │ │ ├── 000033.dat │ │ │ │ ├── 000034.dat │ │ │ │ ├── 000035.dat │ │ │ │ ├── 000036.dat │ │ │ │ ├── 000037.dat │ │ │ │ ├── 000038.dat │ │ │ │ ├── 000039.dat │ │ │ │ └── 000040.dat │ │ └── tpch │ │ │ ├── data_maintenance │ │ │ └── parameter_values.dat │ │ │ └── setup_data_maintenance │ │ │ └── parameter_values.dat │ ├── snowflake-8.13.1 │ │ ├── config │ │ │ ├── samples │ │ │ │ ├── sample_connections_config.yaml │ │ │ │ ├── sample_experiment_config.yaml │ │ │ │ └── sample_telemetry_config.yaml │ │ │ └── tpcds │ │ │ │ ├── library.yaml │ │ │ │ ├── setup_experiment.yaml │ │ │ │ ├── w0_tpcds-iceberg.yaml │ │ │ │ ├── w0_tpcds-native.yaml │ │ │ │ ├── wp1_longevity-iceberg.yaml │ │ │ │ ├── wp1_longevity-native.yaml │ │ │ │ ├── wp2_resilience-iceberg.yaml │ │ │ │ ├── wp2_resilience-native.yaml │ │ │ │ ├── wp3_rw_concurrency-iceberg.yaml │ │ │ │ ├── wp3_rw_concurrency-native.yaml │ │ │ │ ├── wp3_rw_concurrency_multi-iceberg.yaml │ │ │ │ ├── wp3_rw_concurrency_multi-native.yaml │ │ │ │ ├── wp4_time_travel-iceberg.yaml │ │ │ │ └── wp4_time_travel-native.yaml │ │ └── scripts │ │ │ └── tpcds │ │ │ ├── build │ │ │ ├── 2_load_call_center.sql │ │ │ ├── 2_load_catalog_page.sql │ │ │ ├── 2_load_catalog_returns.sql │ │ │ ├── 2_load_catalog_sales.sql │ │ │ ├── 2_load_customer.sql │ │ │ ├── 2_load_customer_address.sql │ │ │ ├── 2_load_customer_demographics.sql │ │ │ ├── 2_load_date_dim.sql │ │ │ ├── 2_load_household_demographics.sql │ │ │ ├── 2_load_income_band.sql │ │ │ ├── 2_load_inventory.sql │ │ │ ├── 2_load_item.sql │ │ │ ├── 2_load_promotion.sql │ │ │ ├── 2_load_reason.sql │ │ │ ├── 2_load_ship_mode.sql │ │ │ ├── 2_load_store.sql │ │ │ ├── 2_load_store_returns.sql │ │ │ ├── 2_load_store_sales.sql │ │ │ ├── 2_load_time_dim.sql │ │ │ ├── 2_load_warehouse.sql │ │ │ ├── 2_load_web_page.sql │ │ │ ├── 2_load_web_returns.sql │ │ │ ├── 2_load_web_sales.sql │ │ │ ├── 2_load_web_site.sql │ │ │ ├── iceberg_tables │ │ │ │ ├── 1_create_call_center.sql │ │ │ │ ├── 1_create_catalog_page.sql │ │ │ │ ├── 1_create_catalog_returns.sql │ │ │ │ ├── 1_create_catalog_sales.sql │ │ │ │ ├── 1_create_customer.sql │ │ │ │ ├── 1_create_customer_address.sql │ │ │ │ ├── 1_create_customer_demographics.sql │ │ │ │ ├── 1_create_date_dim.sql │ │ │ │ ├── 1_create_household_demographics.sql │ │ │ │ ├── 1_create_income_band.sql │ │ │ │ ├── 1_create_inventory.sql │ │ │ │ ├── 1_create_item.sql │ │ │ │ ├── 1_create_promotion.sql │ │ │ │ ├── 1_create_reason.sql │ │ │ │ ├── 1_create_ship_mode.sql │ │ │ │ ├── 1_create_store.sql │ │ │ │ ├── 1_create_store_returns.sql │ │ │ │ ├── 1_create_store_sales.sql │ │ │ │ ├── 1_create_time_dim.sql │ │ │ │ ├── 1_create_warehouse.sql │ │ │ │ ├── 1_create_web_page.sql │ │ │ │ ├── 1_create_web_returns.sql │ │ │ │ ├── 1_create_web_sales.sql │ │ │ │ └── 1_create_web_site.sql │ │ │ └── native_tables │ │ │ │ ├── 1_create_call_center.sql │ │ │ │ ├── 1_create_catalog_page.sql │ │ │ │ ├── 1_create_catalog_returns.sql │ │ │ │ ├── 1_create_catalog_sales.sql │ │ │ │ ├── 1_create_customer.sql │ │ │ │ ├── 1_create_customer_address.sql │ │ │ │ ├── 1_create_customer_demographics.sql │ │ │ │ ├── 1_create_date_dim.sql │ │ │ │ ├── 1_create_household_demographics.sql │ │ │ │ ├── 1_create_income_band.sql │ │ │ │ ├── 1_create_inventory.sql │ │ │ │ ├── 1_create_item.sql │ │ │ │ ├── 1_create_promotion.sql │ │ │ │ ├── 1_create_reason.sql │ │ │ │ ├── 1_create_ship_mode.sql │ │ │ │ ├── 1_create_store.sql │ │ │ │ ├── 1_create_store_returns.sql │ │ │ │ ├── 1_create_store_sales.sql │ │ │ │ ├── 1_create_time_dim.sql │ │ │ │ ├── 1_create_warehouse.sql │ │ │ │ ├── 1_create_web_page.sql │ │ │ │ ├── 1_create_web_returns.sql │ │ │ │ ├── 1_create_web_sales.sql │ │ │ │ └── 1_create_web_site.sql │ │ │ ├── data_maintenance │ │ │ ├── DF_CS.sql │ │ │ ├── DF_I.sql │ │ │ ├── DF_SS.sql │ │ │ ├── DF_WS.sql │ │ │ ├── LF_CR.sql │ │ │ ├── LF_CS.sql │ │ │ ├── LF_I.sql │ │ │ ├── LF_SR.sql │ │ │ ├── LF_SS.sql │ │ │ ├── LF_WR.sql │ │ │ └── LF_WS.sql │ │ │ ├── init │ │ │ └── init.sql │ │ │ ├── setup │ │ │ ├── ddl-external-tables.sql │ │ │ └── populate-external-tables.sql │ │ │ ├── setup_data_maintenance │ │ │ ├── ddl-external-tables-refresh.sql │ │ │ └── populate-external-tables-refresh.sql │ │ │ └── single_user │ │ │ ├── query1.sql │ │ │ ├── query10.sql │ │ │ ├── query11.sql │ │ │ ├── query12.sql │ │ │ ├── query13.sql │ │ │ ├── query14.sql │ │ │ ├── query15.sql │ │ │ ├── query16.sql │ │ │ ├── query17.sql │ │ │ ├── query18.sql │ │ │ ├── query19.sql │ │ │ ├── query2.sql │ │ │ ├── query20.sql │ │ │ ├── query21.sql │ │ │ ├── query22.sql │ │ │ ├── query23.sql │ │ │ ├── query24.sql │ │ │ ├── query25.sql │ │ │ ├── query26.sql │ │ │ ├── query27.sql │ │ │ ├── query28.sql │ │ │ ├── query29.sql │ │ │ ├── query3.sql │ │ │ ├── query30.sql │ │ │ ├── query31.sql │ │ │ ├── query32.sql │ │ │ ├── query33.sql │ │ │ ├── query34.sql │ │ │ ├── query35.sql │ │ │ ├── query36.sql │ │ │ ├── query37.sql │ │ │ ├── query38.sql │ │ │ ├── query39.sql │ │ │ ├── query4.sql │ │ │ ├── query40.sql │ │ │ ├── query41.sql │ │ │ ├── query42.sql │ │ │ ├── query43.sql │ │ │ ├── query44.sql │ │ │ ├── query45.sql │ │ │ ├── query46.sql │ │ │ ├── query47.sql │ │ │ ├── query48.sql │ │ │ ├── query49.sql │ │ │ ├── query5.sql │ │ │ ├── query50.sql │ │ │ ├── query51.sql │ │ │ ├── query52.sql │ │ │ ├── query53.sql │ │ │ ├── query54.sql │ │ │ ├── query55.sql │ │ │ ├── query56.sql │ │ │ ├── query57.sql │ │ │ ├── query58.sql │ │ │ ├── query59.sql │ │ │ ├── query6.sql │ │ │ ├── query60.sql │ │ │ ├── query61.sql │ │ │ ├── query62.sql │ │ │ ├── query63.sql │ │ │ ├── query64.sql │ │ │ ├── query65.sql │ │ │ ├── query66.sql │ │ │ ├── query67.sql │ │ │ ├── query68.sql │ │ │ ├── query69.sql │ │ │ ├── query7.sql │ │ │ ├── query70.sql │ │ │ ├── query71.sql │ │ │ ├── query72.sql │ │ │ ├── query73.sql │ │ │ ├── query74.sql │ │ │ ├── query75.sql │ │ │ ├── query76.sql │ │ │ ├── query77.sql │ │ │ ├── query78.sql │ │ │ ├── query79.sql │ │ │ ├── query8.sql │ │ │ ├── query80.sql │ │ │ ├── query81.sql │ │ │ ├── query82.sql │ │ │ ├── query83.sql │ │ │ ├── query84.sql │ │ │ ├── query85.sql │ │ │ ├── query86.sql │ │ │ ├── query87.sql │ │ │ ├── query88.sql │ │ │ ├── query89.sql │ │ │ ├── query9.sql │ │ │ ├── query90.sql │ │ │ ├── query91.sql │ │ │ ├── query92.sql │ │ │ ├── query93.sql │ │ │ ├── query94.sql │ │ │ ├── query95.sql │ │ │ ├── query96.sql │ │ │ ├── query97.sql │ │ │ ├── query98.sql │ │ │ └── query99.sql │ ├── spark-3.3.1 │ │ ├── azure-pipelines │ │ │ ├── README.md │ │ │ ├── config │ │ │ │ ├── connections_config.yaml │ │ │ │ ├── experiment_config-cow-delta-2.2.0.yaml │ │ │ │ ├── experiment_config-cow-hudi-0.12.2.yaml │ │ │ │ ├── experiment_config-cow-iceberg-1.1.0.yaml │ │ │ │ ├── experiment_config-mor-hudi-0.12.2.yaml │ │ │ │ ├── experiment_config-mor-iceberg-1.1.0.yaml │ │ │ │ ├── setup_experiment_config.yaml │ │ │ │ └── telemetry_config.yaml │ │ │ ├── run-lst-bench.yml │ │ │ └── sh │ │ │ │ ├── cleanup-delta-2.2.0.sh │ │ │ │ ├── cleanup-hudi-0.12.2.sh │ │ │ │ ├── cleanup-iceberg-1.1.0.sh │ │ │ │ ├── delta-2.2.0.sh │ │ │ │ ├── dist-exec.sh │ │ │ │ ├── dist-setup.sh │ │ │ │ ├── hive-site.xml.template │ │ │ │ ├── hms.sh │ │ │ │ ├── hudi-0.12.2.sh │ │ │ │ ├── iceberg-1.1.0.sh │ │ │ │ ├── init.sh │ │ │ │ ├── spark-defaults.conf.template │ │ │ │ ├── spark-env.sh.template │ │ │ │ ├── start-cluster.sh │ │ │ │ └── stop-cluster.sh │ │ ├── config │ │ │ ├── tpcds │ │ │ │ ├── library.yaml │ │ │ │ ├── setup_experiment.yaml │ │ │ │ ├── w0_tpcds-delta-2.2.0.yaml │ │ │ │ ├── w0_tpcds-hudi-0.12.2.yaml │ │ │ │ ├── w0_tpcds-iceberg-1.1.0.yaml │ │ │ │ ├── wp1_longevity-delta-2.2.0.yaml │ │ │ │ ├── wp1_longevity-hudi-0.12.2.yaml │ │ │ │ ├── wp1_longevity-iceberg-1.1.0.yaml │ │ │ │ ├── wp1_longevity_trickle_1k_batches.yaml │ │ │ │ ├── wp2_resilience-delta-2.2.0.yaml │ │ │ │ ├── wp2_resilience-hudi-0.12.2.yaml │ │ │ │ ├── wp2_resilience-iceberg-1.1.0.yaml │ │ │ │ ├── wp3_rw_concurrency-delta-2.2.0.yaml │ │ │ │ ├── wp3_rw_concurrency-hudi-0.12.2.yaml │ │ │ │ ├── wp3_rw_concurrency-iceberg-1.1.0.yaml │ │ │ │ ├── wp3_rw_concurrency_multi-delta-2.2.0.yaml │ │ │ │ ├── wp4_time_travel-delta-2.2.0.yaml │ │ │ │ ├── wp4_time_travel-hudi-0.12.2.yaml │ │ │ │ └── wp4_time_travel-iceberg-1.1.0.yaml │ │ │ └── tpch │ │ │ │ ├── library.yaml │ │ │ │ ├── w0_tpch-delta.yaml │ │ │ │ ├── w0_tpch-hudi.yaml │ │ │ │ └── w0_tpch-iceberg.yaml │ │ ├── results │ │ │ └── spark-3.3.1-2024-02-01-8xStandard_E8s_v5.duckdb │ │ └── scripts │ │ │ ├── tpcds │ │ │ ├── build │ │ │ │ ├── 1_create_call_center.sql │ │ │ │ ├── 1_create_catalog_page.sql │ │ │ │ ├── 1_create_catalog_returns.sql │ │ │ │ ├── 1_create_catalog_sales.sql │ │ │ │ ├── 1_create_customer.sql │ │ │ │ ├── 1_create_customer_address.sql │ │ │ │ ├── 1_create_customer_demographics.sql │ │ │ │ ├── 1_create_date_dim.sql │ │ │ │ ├── 1_create_household_demographics.sql │ │ │ │ ├── 1_create_income_band.sql │ │ │ │ ├── 1_create_inventory.sql │ │ │ │ ├── 1_create_item.sql │ │ │ │ ├── 1_create_promotion.sql │ │ │ │ ├── 1_create_reason.sql │ │ │ │ ├── 1_create_ship_mode.sql │ │ │ │ ├── 1_create_store.sql │ │ │ │ ├── 1_create_store_returns.sql │ │ │ │ ├── 1_create_store_sales.sql │ │ │ │ ├── 1_create_time_dim.sql │ │ │ │ ├── 1_create_warehouse.sql │ │ │ │ ├── 1_create_web_page.sql │ │ │ │ ├── 1_create_web_returns.sql │ │ │ │ ├── 1_create_web_sales.sql │ │ │ │ ├── 1_create_web_site.sql │ │ │ │ ├── 2_load_call_center.sql │ │ │ │ ├── 2_load_catalog_page.sql │ │ │ │ ├── 2_load_catalog_returns.sql │ │ │ │ ├── 2_load_catalog_sales.sql │ │ │ │ ├── 2_load_customer.sql │ │ │ │ ├── 2_load_customer_address.sql │ │ │ │ ├── 2_load_customer_demographics.sql │ │ │ │ ├── 2_load_date_dim.sql │ │ │ │ ├── 2_load_household_demographics.sql │ │ │ │ ├── 2_load_income_band.sql │ │ │ │ ├── 2_load_inventory.sql │ │ │ │ ├── 2_load_item.sql │ │ │ │ ├── 2_load_promotion.sql │ │ │ │ ├── 2_load_reason.sql │ │ │ │ ├── 2_load_ship_mode.sql │ │ │ │ ├── 2_load_store.sql │ │ │ │ ├── 2_load_store_returns.sql │ │ │ │ ├── 2_load_store_sales.sql │ │ │ │ ├── 2_load_time_dim.sql │ │ │ │ ├── 2_load_warehouse.sql │ │ │ │ ├── 2_load_web_page.sql │ │ │ │ ├── 2_load_web_returns.sql │ │ │ │ ├── 2_load_web_sales.sql │ │ │ │ ├── 2_load_web_site.sql │ │ │ │ ├── 3_analyze_call_center.sql │ │ │ │ ├── 3_analyze_catalog_page.sql │ │ │ │ ├── 3_analyze_catalog_returns.sql │ │ │ │ ├── 3_analyze_catalog_sales.sql │ │ │ │ ├── 3_analyze_customer.sql │ │ │ │ ├── 3_analyze_customer_address.sql │ │ │ │ ├── 3_analyze_customer_demographics.sql │ │ │ │ ├── 3_analyze_date_dim.sql │ │ │ │ ├── 3_analyze_household_demographics.sql │ │ │ │ ├── 3_analyze_income_band.sql │ │ │ │ ├── 3_analyze_inventory.sql │ │ │ │ ├── 3_analyze_item.sql │ │ │ │ ├── 3_analyze_promotion.sql │ │ │ │ ├── 3_analyze_reason.sql │ │ │ │ ├── 3_analyze_ship_mode.sql │ │ │ │ ├── 3_analyze_store.sql │ │ │ │ ├── 3_analyze_store_returns.sql │ │ │ │ ├── 3_analyze_store_sales.sql │ │ │ │ ├── 3_analyze_time_dim.sql │ │ │ │ ├── 3_analyze_warehouse.sql │ │ │ │ ├── 3_analyze_web_page.sql │ │ │ │ ├── 3_analyze_web_returns.sql │ │ │ │ ├── 3_analyze_web_sales.sql │ │ │ │ └── 3_analyze_web_site.sql │ │ │ ├── data_maintenance │ │ │ │ ├── DF_CS-merge.sql │ │ │ │ ├── DF_CS-mixed.sql │ │ │ │ ├── DF_CS.sql │ │ │ │ ├── DF_I-merge.sql │ │ │ │ ├── DF_I-mixed.sql │ │ │ │ ├── DF_I.sql │ │ │ │ ├── DF_SS-merge.sql │ │ │ │ ├── DF_SS-mixed.sql │ │ │ │ ├── DF_SS.sql │ │ │ │ ├── DF_WS-merge.sql │ │ │ │ ├── DF_WS-mixed.sql │ │ │ │ ├── DF_WS.sql │ │ │ │ ├── LF_CR.sql │ │ │ │ ├── LF_CS.sql │ │ │ │ ├── LF_I.sql │ │ │ │ ├── LF_SR.sql │ │ │ │ ├── LF_SS.sql │ │ │ │ ├── LF_WR.sql │ │ │ │ └── LF_WS.sql │ │ │ ├── data_maintenance_dependent │ │ │ │ ├── DF_CR_1.sql │ │ │ │ ├── DF_CR_2.sql │ │ │ │ ├── DF_CR_3.sql │ │ │ │ ├── DF_CR_delete.sql │ │ │ │ ├── DF_CS_1.sql │ │ │ │ ├── DF_CS_2.sql │ │ │ │ ├── DF_CS_3.sql │ │ │ │ ├── DF_CS_delete.sql │ │ │ │ ├── DF_I_1.sql │ │ │ │ ├── DF_I_2.sql │ │ │ │ ├── DF_I_3.sql │ │ │ │ ├── DF_I_delete.sql │ │ │ │ ├── DF_SR_1.sql │ │ │ │ ├── DF_SR_2.sql │ │ │ │ ├── DF_SR_3.sql │ │ │ │ ├── DF_SR_delete.sql │ │ │ │ ├── DF_SS_1.sql │ │ │ │ ├── DF_SS_2.sql │ │ │ │ ├── DF_SS_3.sql │ │ │ │ ├── DF_SS_delete.sql │ │ │ │ ├── DF_WR_1.sql │ │ │ │ ├── DF_WR_2.sql │ │ │ │ ├── DF_WR_3.sql │ │ │ │ ├── DF_WR_delete.sql │ │ │ │ ├── DF_WS_1.sql │ │ │ │ ├── DF_WS_2.sql │ │ │ │ ├── DF_WS_3.sql │ │ │ │ ├── DF_WS_delete.sql │ │ │ │ ├── LF_CR_1.sql │ │ │ │ ├── LF_CR_2.sql │ │ │ │ ├── LF_CR_3.sql │ │ │ │ ├── LF_CR_insert.sql │ │ │ │ ├── LF_CS_1.sql │ │ │ │ ├── LF_CS_2.sql │ │ │ │ ├── LF_CS_3.sql │ │ │ │ ├── LF_CS_insert.sql │ │ │ │ ├── LF_I_1.sql │ │ │ │ ├── LF_I_2.sql │ │ │ │ ├── LF_I_3.sql │ │ │ │ ├── LF_I_insert.sql │ │ │ │ ├── LF_SR_1.sql │ │ │ │ ├── LF_SR_2.sql │ │ │ │ ├── LF_SR_3.sql │ │ │ │ ├── LF_SR_insert.sql │ │ │ │ ├── LF_SS_1.sql │ │ │ │ ├── LF_SS_2.sql │ │ │ │ ├── LF_SS_3.sql │ │ │ │ ├── LF_SS_insert.sql │ │ │ │ ├── LF_WR_1.sql │ │ │ │ ├── LF_WR_2.sql │ │ │ │ ├── LF_WR_3.sql │ │ │ │ ├── LF_WR_insert.sql │ │ │ │ ├── LF_WS_1.sql │ │ │ │ ├── LF_WS_2.sql │ │ │ │ ├── LF_WS_3.sql │ │ │ │ └── LF_WS_insert.sql │ │ │ ├── init │ │ │ │ └── init.sql │ │ │ ├── optimize │ │ │ │ ├── o_call_center-delta.sql │ │ │ │ ├── o_call_center-hudi.sql │ │ │ │ ├── o_call_center-iceberg.sql │ │ │ │ ├── o_catalog_page-delta.sql │ │ │ │ ├── o_catalog_page-hudi.sql │ │ │ │ ├── o_catalog_page-iceberg.sql │ │ │ │ ├── o_catalog_returns-delta.sql │ │ │ │ ├── o_catalog_returns-hudi.sql │ │ │ │ ├── o_catalog_returns-iceberg.sql │ │ │ │ ├── o_catalog_sales-delta.sql │ │ │ │ ├── o_catalog_sales-hudi.sql │ │ │ │ ├── o_catalog_sales-iceberg.sql │ │ │ │ ├── o_customer-delta.sql │ │ │ │ ├── o_customer-hudi.sql │ │ │ │ ├── o_customer-iceberg.sql │ │ │ │ ├── o_customer_address-delta.sql │ │ │ │ ├── o_customer_address-hudi.sql │ │ │ │ ├── o_customer_address-iceberg.sql │ │ │ │ ├── o_customer_demographics-delta.sql │ │ │ │ ├── o_customer_demographics-hudi.sql │ │ │ │ ├── o_customer_demographics-iceberg.sql │ │ │ │ ├── o_date_dim-delta.sql │ │ │ │ ├── o_date_dim-hudi.sql │ │ │ │ ├── o_date_dim-iceberg.sql │ │ │ │ ├── o_household_demographics-delta.sql │ │ │ │ ├── o_household_demographics-hudi.sql │ │ │ │ ├── o_household_demographics-iceberg.sql │ │ │ │ ├── o_income_band-delta.sql │ │ │ │ ├── o_income_band-hudi.sql │ │ │ │ ├── o_income_band-iceberg.sql │ │ │ │ ├── o_inventory-delta.sql │ │ │ │ ├── o_inventory-hudi.sql │ │ │ │ ├── o_inventory-iceberg.sql │ │ │ │ ├── o_item-delta.sql │ │ │ │ ├── o_item-hudi.sql │ │ │ │ ├── o_item-iceberg.sql │ │ │ │ ├── o_promotion-delta.sql │ │ │ │ ├── o_promotion-hudi.sql │ │ │ │ ├── o_promotion-iceberg.sql │ │ │ │ ├── o_reason-delta.sql │ │ │ │ ├── o_reason-hudi.sql │ │ │ │ ├── o_reason-iceberg.sql │ │ │ │ ├── o_ship_mode-delta.sql │ │ │ │ ├── o_ship_mode-hudi.sql │ │ │ │ ├── o_ship_mode-iceberg.sql │ │ │ │ ├── o_store-delta.sql │ │ │ │ ├── o_store-hudi.sql │ │ │ │ ├── o_store-iceberg.sql │ │ │ │ ├── o_store_returns-delta.sql │ │ │ │ ├── o_store_returns-hudi.sql │ │ │ │ ├── o_store_returns-iceberg.sql │ │ │ │ ├── o_store_sales-delta.sql │ │ │ │ ├── o_store_sales-hudi.sql │ │ │ │ ├── o_store_sales-iceberg.sql │ │ │ │ ├── o_time_dim-delta.sql │ │ │ │ ├── o_time_dim-hudi.sql │ │ │ │ ├── o_time_dim-iceberg.sql │ │ │ │ ├── o_warehouse-delta.sql │ │ │ │ ├── o_warehouse-hudi.sql │ │ │ │ ├── o_warehouse-iceberg.sql │ │ │ │ ├── o_web_page-delta.sql │ │ │ │ ├── o_web_page-hudi.sql │ │ │ │ ├── o_web_page-iceberg.sql │ │ │ │ ├── o_web_returns-delta.sql │ │ │ │ ├── o_web_returns-hudi.sql │ │ │ │ ├── o_web_returns-iceberg.sql │ │ │ │ ├── o_web_sales-delta.sql │ │ │ │ ├── o_web_sales-hudi.sql │ │ │ │ ├── o_web_sales-iceberg.sql │ │ │ │ ├── o_web_site-delta.sql │ │ │ │ ├── o_web_site-hudi.sql │ │ │ │ └── o_web_site-iceberg.sql │ │ │ ├── optimize_split │ │ │ │ ├── o_catalog_returns_IN-delta.sql │ │ │ │ ├── o_catalog_returns_IN-hudi.sql │ │ │ │ ├── o_catalog_returns_IN-iceberg.sql │ │ │ │ ├── o_catalog_returns_NULL-delta.sql │ │ │ │ ├── o_catalog_returns_NULL-hudi.sql │ │ │ │ ├── o_catalog_returns_NULL-iceberg.sql │ │ │ │ ├── o_catalog_returns_SELECT.sql │ │ │ │ ├── o_catalog_sales_IN-delta.sql │ │ │ │ ├── o_catalog_sales_IN-hudi.sql │ │ │ │ ├── o_catalog_sales_IN-iceberg.sql │ │ │ │ ├── o_catalog_sales_NULL-delta.sql │ │ │ │ ├── o_catalog_sales_NULL-hudi.sql │ │ │ │ ├── o_catalog_sales_NULL-iceberg.sql │ │ │ │ ├── o_catalog_sales_SELECT.sql │ │ │ │ ├── o_inventory_IN-delta.sql │ │ │ │ ├── o_inventory_IN-hudi.sql │ │ │ │ ├── o_inventory_IN-iceberg.sql │ │ │ │ ├── o_inventory_NULL-delta.sql │ │ │ │ ├── o_inventory_NULL-hudi.sql │ │ │ │ ├── o_inventory_NULL-iceberg.sql │ │ │ │ ├── o_inventory_SELECT.sql │ │ │ │ ├── o_store_returns_IN-delta.sql │ │ │ │ ├── o_store_returns_IN-hudi.sql │ │ │ │ ├── o_store_returns_IN-iceberg.sql │ │ │ │ ├── o_store_returns_NULL-delta.sql │ │ │ │ ├── o_store_returns_NULL-hudi.sql │ │ │ │ ├── o_store_returns_NULL-iceberg.sql │ │ │ │ ├── o_store_returns_SELECT.sql │ │ │ │ ├── o_store_sales_IN-delta.sql │ │ │ │ ├── o_store_sales_IN-hudi.sql │ │ │ │ ├── o_store_sales_IN-iceberg.sql │ │ │ │ ├── o_store_sales_NULL-delta.sql │ │ │ │ ├── o_store_sales_NULL-hudi.sql │ │ │ │ ├── o_store_sales_NULL-iceberg.sql │ │ │ │ ├── o_store_sales_SELECT.sql │ │ │ │ ├── o_web_returns_IN-delta.sql │ │ │ │ ├── o_web_returns_IN-hudi.sql │ │ │ │ ├── o_web_returns_IN-iceberg.sql │ │ │ │ ├── o_web_returns_NULL-delta.sql │ │ │ │ ├── o_web_returns_NULL-hudi.sql │ │ │ │ ├── o_web_returns_NULL-iceberg.sql │ │ │ │ ├── o_web_returns_SELECT.sql │ │ │ │ ├── o_web_sales_IN-delta.sql │ │ │ │ ├── o_web_sales_IN-hudi.sql │ │ │ │ ├── o_web_sales_IN-iceberg.sql │ │ │ │ ├── o_web_sales_NULL-delta.sql │ │ │ │ ├── o_web_sales_NULL-hudi.sql │ │ │ │ ├── o_web_sales_NULL-iceberg.sql │ │ │ │ └── o_web_sales_SELECT.sql │ │ │ ├── setup │ │ │ │ └── ddl-external-tables.sql │ │ │ ├── setup_data_maintenance │ │ │ │ └── ddl-external-tables-refresh.sql │ │ │ └── single_user │ │ │ │ ├── query1.sql │ │ │ │ ├── query10.sql │ │ │ │ ├── query11.sql │ │ │ │ ├── query12.sql │ │ │ │ ├── query13.sql │ │ │ │ ├── query14.sql │ │ │ │ ├── query15.sql │ │ │ │ ├── query16.sql │ │ │ │ ├── query17.sql │ │ │ │ ├── query18.sql │ │ │ │ ├── query19.sql │ │ │ │ ├── query2.sql │ │ │ │ ├── query20.sql │ │ │ │ ├── query21.sql │ │ │ │ ├── query22.sql │ │ │ │ ├── query23.sql │ │ │ │ ├── query24.sql │ │ │ │ ├── query25.sql │ │ │ │ ├── query26.sql │ │ │ │ ├── query27.sql │ │ │ │ ├── query28.sql │ │ │ │ ├── query29.sql │ │ │ │ ├── query3.sql │ │ │ │ ├── query30.sql │ │ │ │ ├── query31.sql │ │ │ │ ├── query32.sql │ │ │ │ ├── query33.sql │ │ │ │ ├── query34.sql │ │ │ │ ├── query35.sql │ │ │ │ ├── query36.sql │ │ │ │ ├── query37.sql │ │ │ │ ├── query38.sql │ │ │ │ ├── query39.sql │ │ │ │ ├── query4.sql │ │ │ │ ├── query40.sql │ │ │ │ ├── query41.sql │ │ │ │ ├── query42.sql │ │ │ │ ├── query43.sql │ │ │ │ ├── query44.sql │ │ │ │ ├── query45.sql │ │ │ │ ├── query46.sql │ │ │ │ ├── query47.sql │ │ │ │ ├── query48.sql │ │ │ │ ├── query49.sql │ │ │ │ ├── query5.sql │ │ │ │ ├── query50.sql │ │ │ │ ├── query51.sql │ │ │ │ ├── query52.sql │ │ │ │ ├── query53.sql │ │ │ │ ├── query54.sql │ │ │ │ ├── query55.sql │ │ │ │ ├── query56.sql │ │ │ │ ├── query57.sql │ │ │ │ ├── query58.sql │ │ │ │ ├── query59.sql │ │ │ │ ├── query6.sql │ │ │ │ ├── query60.sql │ │ │ │ ├── query61.sql │ │ │ │ ├── query62.sql │ │ │ │ ├── query63.sql │ │ │ │ ├── query64.sql │ │ │ │ ├── query65.sql │ │ │ │ ├── query66.sql │ │ │ │ ├── query67.sql │ │ │ │ ├── query68.sql │ │ │ │ ├── query69.sql │ │ │ │ ├── query7.sql │ │ │ │ ├── query70.sql │ │ │ │ ├── query71.sql │ │ │ │ ├── query72.sql │ │ │ │ ├── query73.sql │ │ │ │ ├── query74.sql │ │ │ │ ├── query75.sql │ │ │ │ ├── query76.sql │ │ │ │ ├── query77.sql │ │ │ │ ├── query78.sql │ │ │ │ ├── query79.sql │ │ │ │ ├── query8.sql │ │ │ │ ├── query80.sql │ │ │ │ ├── query81.sql │ │ │ │ ├── query82.sql │ │ │ │ ├── query83.sql │ │ │ │ ├── query84.sql │ │ │ │ ├── query85.sql │ │ │ │ ├── query86.sql │ │ │ │ ├── query87.sql │ │ │ │ ├── query88.sql │ │ │ │ ├── query89.sql │ │ │ │ ├── query9.sql │ │ │ │ ├── query90.sql │ │ │ │ ├── query91.sql │ │ │ │ ├── query92.sql │ │ │ │ ├── query93.sql │ │ │ │ ├── query94.sql │ │ │ │ ├── query95.sql │ │ │ │ ├── query96.sql │ │ │ │ ├── query97.sql │ │ │ │ ├── query98.sql │ │ │ │ └── query99.sql │ │ │ └── tpch │ │ │ ├── build │ │ │ ├── 1_create_customer.sql │ │ │ ├── 1_create_lineitem.sql │ │ │ ├── 1_create_nation.sql │ │ │ ├── 1_create_orders.sql │ │ │ ├── 1_create_part.sql │ │ │ ├── 1_create_partsupp.sql │ │ │ ├── 1_create_region.sql │ │ │ ├── 1_create_supplier.sql │ │ │ ├── 2_load_customer.sql │ │ │ ├── 2_load_lineitem.sql │ │ │ ├── 2_load_nation.sql │ │ │ ├── 2_load_orders.sql │ │ │ ├── 2_load_part.sql │ │ │ ├── 2_load_partsupp.sql │ │ │ ├── 2_load_region.sql │ │ │ ├── 2_load_supplier.sql │ │ │ ├── 3_analyze_customer.sql │ │ │ ├── 3_analyze_lineitem.sql │ │ │ ├── 3_analyze_nation.sql │ │ │ ├── 3_analyze_orders.sql │ │ │ ├── 3_analyze_part.sql │ │ │ ├── 3_analyze_partsupp.sql │ │ │ ├── 3_analyze_region.sql │ │ │ └── 3_analyze_supplier.sql │ │ │ ├── data_maintenance │ │ │ ├── RF1.sql │ │ │ ├── RF2-merge.sql │ │ │ └── RF2.sql │ │ │ ├── init │ │ │ └── init.sql │ │ │ ├── setup │ │ │ └── ddl-external-tables.sql │ │ │ ├── setup_data_maintenance │ │ │ └── ddl-external-tables-refresh.sql │ │ │ └── single_user │ │ │ ├── query1.sql │ │ │ ├── query10.sql │ │ │ ├── query11.sql │ │ │ ├── query12.sql │ │ │ ├── query13.sql │ │ │ ├── query14.sql │ │ │ ├── query15.sql │ │ │ ├── query16.sql │ │ │ ├── query17.sql │ │ │ ├── query18.sql │ │ │ ├── query19.sql │ │ │ ├── query2.sql │ │ │ ├── query20.sql │ │ │ ├── query21.sql │ │ │ ├── query22.sql │ │ │ ├── query3.sql │ │ │ ├── query4.sql │ │ │ ├── query5.sql │ │ │ ├── query6.sql │ │ │ ├── query7.sql │ │ │ ├── query8.sql │ │ │ └── query9.sql │ └── trino-420 │ │ ├── azure-pipelines │ │ ├── README.md │ │ ├── config │ │ │ ├── connections_config.yaml │ │ │ ├── experiment_config-cow-delta.yaml │ │ │ ├── experiment_config-mor-iceberg.yaml │ │ │ ├── setup_experiment_config.yaml │ │ │ └── telemetry_config.yaml │ │ ├── run-lst-bench.yml │ │ └── sh │ │ │ ├── coordinator-config.properties.template │ │ │ ├── delta.properties.template │ │ │ ├── dist-exec.sh │ │ │ ├── dist-setup.sh │ │ │ ├── hive-site.xml.template │ │ │ ├── hive.properties.template │ │ │ ├── hms.sh │ │ │ ├── iceberg.properties.template │ │ │ ├── init.sh │ │ │ ├── jvm.config.template │ │ │ ├── log.properties.template │ │ │ ├── node.properties.template │ │ │ ├── start-cluster.sh │ │ │ ├── stop-cluster.sh │ │ │ └── worker-config.properties.template │ │ ├── config │ │ └── tpcds │ │ │ ├── library.yaml │ │ │ ├── setup_experiment.yaml │ │ │ ├── w0_tpcds.yaml │ │ │ ├── wp1_longevity.yaml │ │ │ ├── wp2_resilience.yaml │ │ │ └── wp3_rw_concurrency.yaml │ │ ├── results │ │ └── trino-420-2024-02-01-8xStandard_E8s_v5.duckdb │ │ └── scripts │ │ └── tpcds │ │ ├── build │ │ ├── 1_create_call_center.sql │ │ ├── 1_create_catalog_page.sql │ │ ├── 1_create_catalog_returns.sql │ │ ├── 1_create_catalog_sales.sql │ │ ├── 1_create_customer.sql │ │ ├── 1_create_customer_address.sql │ │ ├── 1_create_customer_demographics.sql │ │ ├── 1_create_date_dim.sql │ │ ├── 1_create_household_demographics.sql │ │ ├── 1_create_income_band.sql │ │ ├── 1_create_inventory.sql │ │ ├── 1_create_item.sql │ │ ├── 1_create_promotion.sql │ │ ├── 1_create_reason.sql │ │ ├── 1_create_ship_mode.sql │ │ ├── 1_create_store.sql │ │ ├── 1_create_store_returns.sql │ │ ├── 1_create_store_sales.sql │ │ ├── 1_create_time_dim.sql │ │ ├── 1_create_warehouse.sql │ │ ├── 1_create_web_page.sql │ │ ├── 1_create_web_returns.sql │ │ ├── 1_create_web_sales.sql │ │ ├── 1_create_web_site.sql │ │ ├── 2_load_call_center.sql │ │ ├── 2_load_catalog_page.sql │ │ ├── 2_load_catalog_returns.sql │ │ ├── 2_load_catalog_sales.sql │ │ ├── 2_load_customer.sql │ │ ├── 2_load_customer_address.sql │ │ ├── 2_load_customer_demographics.sql │ │ ├── 2_load_date_dim.sql │ │ ├── 2_load_household_demographics.sql │ │ ├── 2_load_income_band.sql │ │ ├── 2_load_inventory.sql │ │ ├── 2_load_item.sql │ │ ├── 2_load_promotion.sql │ │ ├── 2_load_reason.sql │ │ ├── 2_load_ship_mode.sql │ │ ├── 2_load_store.sql │ │ ├── 2_load_store_returns.sql │ │ ├── 2_load_store_sales.sql │ │ ├── 2_load_time_dim.sql │ │ ├── 2_load_warehouse.sql │ │ ├── 2_load_web_page.sql │ │ ├── 2_load_web_returns.sql │ │ ├── 2_load_web_sales.sql │ │ ├── 2_load_web_site.sql │ │ ├── 3_analyze_call_center.sql │ │ ├── 3_analyze_catalog_page.sql │ │ ├── 3_analyze_catalog_returns.sql │ │ ├── 3_analyze_catalog_sales.sql │ │ ├── 3_analyze_customer.sql │ │ ├── 3_analyze_customer_address.sql │ │ ├── 3_analyze_customer_demographics.sql │ │ ├── 3_analyze_date_dim.sql │ │ ├── 3_analyze_household_demographics.sql │ │ ├── 3_analyze_income_band.sql │ │ ├── 3_analyze_inventory.sql │ │ ├── 3_analyze_item.sql │ │ ├── 3_analyze_promotion.sql │ │ ├── 3_analyze_reason.sql │ │ ├── 3_analyze_ship_mode.sql │ │ ├── 3_analyze_store.sql │ │ ├── 3_analyze_store_returns.sql │ │ ├── 3_analyze_store_sales.sql │ │ ├── 3_analyze_time_dim.sql │ │ ├── 3_analyze_warehouse.sql │ │ ├── 3_analyze_web_page.sql │ │ ├── 3_analyze_web_returns.sql │ │ ├── 3_analyze_web_sales.sql │ │ └── 3_analyze_web_site.sql │ │ ├── data_maintenance │ │ ├── DF_CS.sql │ │ ├── DF_I.sql │ │ ├── DF_SS.sql │ │ ├── DF_WS.sql │ │ ├── LF_CR.sql │ │ ├── LF_CS.sql │ │ ├── LF_I.sql │ │ ├── LF_SR.sql │ │ ├── LF_SS.sql │ │ ├── LF_WR.sql │ │ └── LF_WS.sql │ │ ├── init │ │ └── init.sql │ │ ├── optimize │ │ ├── o_call_center.sql │ │ ├── o_catalog_page.sql │ │ ├── o_catalog_returns.sql │ │ ├── o_catalog_sales.sql │ │ ├── o_customer.sql │ │ ├── o_customer_address.sql │ │ ├── o_customer_demographics.sql │ │ ├── o_date_dim.sql │ │ ├── o_household_demographics.sql │ │ ├── o_income_band.sql │ │ ├── o_inventory.sql │ │ ├── o_item.sql │ │ ├── o_promotion.sql │ │ ├── o_reason.sql │ │ ├── o_ship_mode.sql │ │ ├── o_store.sql │ │ ├── o_store_returns.sql │ │ ├── o_store_sales.sql │ │ ├── o_time_dim.sql │ │ ├── o_warehouse.sql │ │ ├── o_web_page.sql │ │ ├── o_web_returns.sql │ │ ├── o_web_sales.sql │ │ └── o_web_site.sql │ │ ├── optimize_split │ │ ├── o_catalog_returns_IN.sql │ │ ├── o_catalog_returns_NULL.sql │ │ ├── o_catalog_returns_SELECT.sql │ │ ├── o_catalog_sales_IN.sql │ │ ├── o_catalog_sales_NULL.sql │ │ ├── o_catalog_sales_SELECT.sql │ │ ├── o_inventory_IN.sql │ │ ├── o_inventory_NULL.sql │ │ ├── o_inventory_SELECT.sql │ │ ├── o_store_returns_IN.sql │ │ ├── o_store_returns_NULL.sql │ │ ├── o_store_returns_SELECT.sql │ │ ├── o_store_sales_IN.sql │ │ ├── o_store_sales_NULL.sql │ │ ├── o_store_sales_SELECT.sql │ │ ├── o_web_returns_IN.sql │ │ ├── o_web_returns_NULL.sql │ │ ├── o_web_returns_SELECT.sql │ │ ├── o_web_sales_IN.sql │ │ ├── o_web_sales_NULL.sql │ │ └── o_web_sales_SELECT.sql │ │ ├── setup │ │ └── ddl-external-tables.sql │ │ ├── setup_data_maintenance │ │ └── ddl-external-tables-refresh.sql │ │ └── single_user │ │ ├── query1.sql │ │ ├── query10.sql │ │ ├── query11.sql │ │ ├── query12.sql │ │ ├── query13.sql │ │ ├── query14.sql │ │ ├── query15.sql │ │ ├── query16.sql │ │ ├── query17.sql │ │ ├── query18.sql │ │ ├── query19.sql │ │ ├── query2.sql │ │ ├── query20.sql │ │ ├── query21.sql │ │ ├── query22.sql │ │ ├── query23.sql │ │ ├── query24.sql │ │ ├── query25.sql │ │ ├── query26.sql │ │ ├── query27.sql │ │ ├── query28.sql │ │ ├── query29.sql │ │ ├── query3.sql │ │ ├── query30.sql │ │ ├── query31.sql │ │ ├── query32.sql │ │ ├── query33.sql │ │ ├── query34.sql │ │ ├── query35.sql │ │ ├── query36.sql │ │ ├── query37.sql │ │ ├── query38.sql │ │ ├── query39.sql │ │ ├── query4.sql │ │ ├── query40.sql │ │ ├── query41.sql │ │ ├── query42.sql │ │ ├── query43.sql │ │ ├── query44.sql │ │ ├── query45.sql │ │ ├── query46.sql │ │ ├── query47.sql │ │ ├── query48.sql │ │ ├── query49.sql │ │ ├── query5.sql │ │ ├── query50.sql │ │ ├── query51.sql │ │ ├── query52.sql │ │ ├── query53.sql │ │ ├── query54.sql │ │ ├── query55.sql │ │ ├── query56.sql │ │ ├── query57.sql │ │ ├── query58.sql │ │ ├── query59.sql │ │ ├── query6.sql │ │ ├── query60.sql │ │ ├── query61.sql │ │ ├── query62.sql │ │ ├── query63.sql │ │ ├── query64.sql │ │ ├── query65.sql │ │ ├── query66.sql │ │ ├── query67.sql │ │ ├── query68.sql │ │ ├── query69.sql │ │ ├── query7.sql │ │ ├── query70.sql │ │ ├── query71.sql │ │ ├── query72.sql │ │ ├── query73.sql │ │ ├── query74.sql │ │ ├── query75.sql │ │ ├── query76.sql │ │ ├── query77.sql │ │ ├── query78.sql │ │ ├── query79.sql │ │ ├── query8.sql │ │ ├── query80.sql │ │ ├── query81.sql │ │ ├── query82.sql │ │ ├── query83.sql │ │ ├── query84.sql │ │ ├── query85.sql │ │ ├── query86.sql │ │ ├── query87.sql │ │ ├── query88.sql │ │ ├── query89.sql │ │ ├── query9.sql │ │ ├── query90.sql │ │ ├── query91.sql │ │ ├── query92.sql │ │ ├── query93.sql │ │ ├── query94.sql │ │ ├── query95.sql │ │ ├── query96.sql │ │ ├── query97.sql │ │ ├── query98.sql │ │ └── query99.sql └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── microsoft │ │ │ └── lst_bench │ │ │ ├── Driver.java │ │ │ ├── client │ │ │ ├── ClientException.java │ │ │ ├── Connection.java │ │ │ ├── ConnectionManager.java │ │ │ ├── JDBCConnection.java │ │ │ ├── JDBCConnectionManager.java │ │ │ ├── QueryResult.java │ │ │ ├── SparkConnection.java │ │ │ └── SparkConnectionManager.java │ │ │ ├── common │ │ │ ├── BenchmarkConfig.java │ │ │ ├── BenchmarkRunnable.java │ │ │ ├── LSTBenchmarkExecutor.java │ │ │ └── SessionExecutor.java │ │ │ ├── exec │ │ │ ├── FileExec.java │ │ │ ├── PhaseExec.java │ │ │ ├── SessionExec.java │ │ │ ├── StatementExec.java │ │ │ ├── TaskExec.java │ │ │ └── WorkloadExec.java │ │ │ ├── input │ │ │ ├── BenchmarkObjectFactory.java │ │ │ ├── Library.java │ │ │ ├── Phase.java │ │ │ ├── PhaseTemplate.java │ │ │ ├── Session.java │ │ │ ├── SessionTemplate.java │ │ │ ├── Task.java │ │ │ ├── TaskTemplate.java │ │ │ ├── TasksSequence.java │ │ │ ├── Workload.java │ │ │ └── config │ │ │ │ ├── ConnectionConfig.java │ │ │ │ ├── ConnectionsConfig.java │ │ │ │ ├── ExperimentConfig.java │ │ │ │ ├── JDBCConnectionConfig.java │ │ │ │ ├── SparkConnectionConfig.java │ │ │ │ └── TelemetryConfig.java │ │ │ ├── sql │ │ │ └── SQLParser.java │ │ │ ├── task │ │ │ ├── TaskExecutor.java │ │ │ ├── custom │ │ │ │ └── DependentTaskExecutor.java │ │ │ └── util │ │ │ │ └── TaskExecutorArguments.java │ │ │ ├── telemetry │ │ │ ├── EventException.java │ │ │ ├── EventInfo.java │ │ │ ├── SQLTelemetryRegistry.java │ │ │ └── TelemetryHook.java │ │ │ └── util │ │ │ ├── DateTimeFormatter.java │ │ │ ├── FileParser.java │ │ │ ├── StringUtils.java │ │ │ └── TaskExecutorArgumentsParser.java │ └── resources │ │ ├── config │ │ ├── spark │ │ │ ├── sample_connections_config.yaml │ │ │ ├── sample_experiment_config.yaml │ │ │ └── sample_telemetry_config.yaml │ │ └── trino │ │ │ ├── sample_connections_config.yaml │ │ │ ├── sample_experiment_config-delta.yaml │ │ │ ├── sample_experiment_config-hudi.yaml │ │ │ ├── sample_experiment_config-iceberg.yaml │ │ │ └── sample_telemetry_config.yaml │ │ ├── log4j2.xml │ │ ├── schemas │ │ ├── connections_config.json │ │ ├── experiment_config.json │ │ ├── instance.json │ │ ├── library.json │ │ ├── telemetry_config.json │ │ ├── template.json │ │ └── workload.json │ │ └── scripts │ │ └── logging │ │ ├── duckdb │ │ ├── ddl.sql │ │ └── insert.sql │ │ └── spark │ │ ├── ddl.sql │ │ └── insert.sql │ └── test │ ├── java │ └── com │ │ └── microsoft │ │ └── lst_bench │ │ ├── DriverSparkTest.java │ │ ├── client │ │ └── QueryResultTest.java │ │ ├── common │ │ └── LSTBenchmarkExecutorTest.java │ │ ├── exec │ │ └── SessionExecTest.java │ │ ├── input │ │ ├── ParserTest.java │ │ └── ValidationTest.java │ │ └── task │ │ └── TaskExecutorArgumentsTest.java │ └── resources │ ├── config │ ├── samples │ │ ├── connections_config_test0.yaml │ │ ├── incorrect_telemetry_config_test0.yaml │ │ ├── incorrect_telemetry_config_test1.yaml │ │ ├── library_0.yaml │ │ └── library_retry.yaml │ └── spark │ │ ├── experiment_config-delta-skip-faulty.yaml │ │ ├── experiment_config-delta.yaml │ │ ├── experiment_config-hudi.yaml │ │ ├── experiment_config-iceberg.yaml │ │ ├── jdbc_connection_config.yaml │ │ ├── simplified_library.yaml │ │ ├── spark_connection_config-delta.yaml │ │ ├── spark_connection_config-hudi.yaml │ │ ├── spark_connection_config-iceberg.yaml │ │ ├── telemetry_config.yaml │ │ ├── w_all_tpcds-delta.yaml │ │ ├── w_all_tpcds-hudi.yaml │ │ ├── w_all_tpcds-iceberg.yaml │ │ ├── w_all_tpcds_single_session-delta.yaml │ │ ├── w_all_tpcds_single_session-hudi.yaml │ │ ├── w_all_tpcds_single_session-iceberg.yaml │ │ ├── w_all_tpcds_single_session_jdbc-delta.yaml │ │ ├── w_all_tpcds_single_session_jdbc-hudi.yaml │ │ ├── w_all_tpcds_single_session_jdbc-iceberg.yaml │ │ ├── w_all_tpch-delta.yaml │ │ ├── w_all_tpch-hudi.yaml │ │ ├── w_all_tpch-iceberg.yaml │ │ ├── w_faulty_query_test.yaml │ │ ├── w_faulty_query_test2.yaml │ │ ├── w_multi_connection-delta.yaml │ │ ├── w_multi_connection-hudi.yaml │ │ ├── w_multi_connection-iceberg.yaml │ │ └── w_retry_query_test.yaml │ └── scripts │ ├── faulty_test_query.sql │ └── retry_test_query.sql ├── docs ├── 20240609-LSTBench-DBTest24.pdf └── workloads.md ├── launcher.ps1 ├── launcher.sh ├── mvnw ├── mvnw.cmd └── pom.xml /.github/scripts/spark/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker exec -i -u root $SPARK_CONTAINER_ID /bin/bash -c '/bin/bash -s' <= date '${param1}' 13 | and l_shipdate < date '${param1}' + interval '1' month; 14 | -------------------------------------------------------------------------------- /adapters/cab-converter/sql/spark-3.3.1/run/query_15.sql: -------------------------------------------------------------------------------- 1 | with revenue(supplier_no, total_revenue) as ( 2 | select 3 | l_suppkey, 4 | sum(l_extendedprice * (1 - l_discount)) 5 | from 6 | ${catalog}.${database}${stream_num}.lineitem 7 | where 8 | l_shipdate >= date '${param1}' 9 | and l_shipdate < date '${param1}' + interval '3' month 10 | group by 11 | l_suppkey) 12 | select 13 | s_suppkey, 14 | s_name, 15 | s_address, 16 | s_phone, 17 | total_revenue 18 | from 19 | ${catalog}.${database}${stream_num}.supplier, 20 | revenue 21 | where 22 | s_suppkey = supplier_no 23 | and total_revenue = ( 24 | select 25 | max(total_revenue) 26 | from 27 | revenue 28 | ) 29 | order by 30 | s_suppkey; 31 | -------------------------------------------------------------------------------- /adapters/cab-converter/sql/spark-3.3.1/run/query_17.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l_extendedprice) / 7.0 as avg_yearly 3 | from 4 | ${catalog}.${database}${stream_num}.lineitem, 5 | ${catalog}.${database}${stream_num}.part 6 | where 7 | p_partkey = l_partkey 8 | and p_brand = '${param1}' 9 | and p_container = '${param2}' 10 | and l_quantity < ( 11 | select 12 | 0.2 * avg(l_quantity) 13 | from 14 | ${catalog}.${database}${stream_num}.lineitem 15 | where 16 | l_partkey = p_partkey 17 | ); 18 | -------------------------------------------------------------------------------- /adapters/cab-converter/sql/spark-3.3.1/run/query_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | l_orderkey, sum(l_extendedprice*(1-l_discount)) as revenue, o_orderdate, o_shippriority 3 | FROM 4 | ${catalog}.${database}${stream_num}.customer, 5 | ${catalog}.${database}${stream_num}.orders, 6 | ${catalog}.${database}${stream_num}.lineitem 7 | WHERE 8 | c_mktsegment = '${param1}' 9 | and c_custkey = o_custkey 10 | and l_orderkey = o_orderkey 11 | and o_orderdate < date '${param2}' 12 | and l_shipdate > date '${param2}' 13 | GROUP BY 14 | l_orderkey, 15 | o_orderdate, 16 | o_shippriority 17 | ORDER BY 18 | revenue DESC, 19 | o_orderdate 20 | LIMIT 10; 21 | -------------------------------------------------------------------------------- /adapters/cab-converter/sql/spark-3.3.1/run/query_4.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | o_orderpriority, 3 | count(*) as order_count 4 | FROM 5 | ${catalog}.${database}${stream_num}.orders 6 | WHERE 7 | o_orderdate >= date '${param1}' 8 | AND o_orderdate < date '${param1}' + interval '3' month 9 | AND EXISTS ( 10 | SELECT * 11 | FROM 12 | ${catalog}.${database}${stream_num}.lineitem 13 | WHERE 14 | l_orderkey = o_orderkey 15 | AND l_commitdate < l_receiptdate 16 | ) 17 | GROUP BY 18 | o_orderpriority 19 | ORDER BY 20 | o_orderpriority; 21 | -------------------------------------------------------------------------------- /adapters/cab-converter/sql/spark-3.3.1/run/query_6.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l_extendedprice * l_discount) as revenue 3 | from ${catalog}.${database}${stream_num}.lineitem 4 | where 5 | l_shipdate >= date '${param1}' 6 | and l_shipdate < date '${param1}' + interval '1' year 7 | and l_discount between (cast(${param2} as decimal(12,2)) / 100) - 0.01 and (cast(${param2} as decimal(12,2)) / 100) + 0.01 8 | and l_quantity < ${param3}; 9 | -------------------------------------------------------------------------------- /core/metrics/app/requirements.txt: -------------------------------------------------------------------------------- 1 | altair==5.2.0 2 | duckdb==0.9.2 3 | pandas==2.2.0 4 | streamlit==1.37.0 5 | -------------------------------------------------------------------------------- /core/metrics/notebooks/requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter==1.0.0 2 | duckdb==0.8.0 3 | matplotlib==3.7.1 4 | seaborn==0.12.2 5 | pandas==2.0.2 6 | python_dateutil==2.8.2 7 | azure-core==1.27.0 8 | azure-cli-core==2.49.0 9 | azure-mgmt-compute==30.0.0 10 | azure-monitor-query==1.2.0 11 | azure-identity==1.16.1 -------------------------------------------------------------------------------- /core/run/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat: -------------------------------------------------------------------------------- 1 | stream_num| 2 | 000001| 3 | 000002| 4 | 000003| 5 | 000004| 6 | 000005| 7 | 000006| 8 | 000007| 9 | 000008| 10 | 000009| 11 | 000010| 12 | 000011| 13 | 000012| 14 | 000013| 15 | 000014| 16 | 000015| 17 | 000016| 18 | 000017| 19 | 000018| 20 | 000019| 21 | 000020| 22 | -------------------------------------------------------------------------------- /core/run/auxiliary/tpch/data_maintenance/parameter_values.dat: -------------------------------------------------------------------------------- 1 | stream_num| 2 | 000001| 3 | 000002| 4 | 000003| 5 | 000004| 6 | 000005| 7 | 000006| 8 | 000007| 9 | 000008| 10 | -------------------------------------------------------------------------------- /core/run/auxiliary/tpch/setup_data_maintenance/parameter_values.dat: -------------------------------------------------------------------------------- 1 | stream_num| 2 | 000001| 3 | 000002| 4 | 000003| 5 | 000004| 6 | 000005| 7 | 000006| 8 | 000007| 9 | 000008| 10 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/config/samples/sample_connections_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Connections Configuration 2 | --- 3 | version: 1 4 | connections: 5 | - id: snow_wh_small 6 | driver: net.snowflake.client.jdbc.SnowflakeDriver 7 | url: jdbc:snowflake://account-locator.snowflakecomputing.com/?user=user&password=password&warehouse=wh_small 8 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/config/samples/sample_telemetry_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Telemetry Configuration 2 | --- 3 | version: 1 4 | connection: 5 | id: duckdb_0 6 | driver: org.duckdb.DuckDBDriver 7 | url: jdbc:duckdb:./telemetry-snowflake-8.13.1 8 | execute_ddl: true 9 | ddl_file: 'src/main/resources/scripts/logging/duckdb/ddl.sql' 10 | insert_file: 'src/main/resources/scripts/logging/duckdb/insert.sql' 11 | # The following parameter values will be used to replace the variables in the logging statements. 12 | parameter_values: 13 | data_path: '' -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_call_center.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.call_center SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.call_center; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_catalog_page.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.catalog_page SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.catalog_page; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_catalog_returns.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.catalog_returns SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.catalog_returns; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_catalog_sales.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.catalog_sales SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.catalog_sales; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_customer.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.customer SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.customer; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_customer_address.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.customer_address SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.customer_address; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_customer_demographics.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.customer_demographics SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.customer_demographics; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_date_dim.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.date_dim SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.date_dim; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_household_demographics.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.household_demographics SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.household_demographics; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_income_band.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.income_band SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.income_band; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_inventory.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.inventory SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.inventory; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_item.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.item SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.item; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_promotion.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.promotion SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.promotion; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_reason.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.reason SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.reason; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_ship_mode.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.ship_mode SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.ship_mode; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_store.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.store SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.store; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_store_returns.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.store_returns SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.store_returns; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_store_sales.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.store_sales SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.store_sales; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_time_dim.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.time_dim SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.time_dim; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_warehouse.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.warehouse SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.warehouse; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_web_page.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.web_page SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.web_page; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_web_returns.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.web_returns SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.web_returns; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_web_sales.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.web_sales SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.web_sales; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/2_load_web_site.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.web_site SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.web_site; 7 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/iceberg_tables/1_create_catalog_page.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | ICEBERG TABLE 3 | ${catalog}.${database}.catalog_page( 4 | cp_catalog_page_sk INT, 5 | cp_catalog_page_id string, 6 | cp_start_date_sk INT, 7 | cp_end_date_sk INT, 8 | cp_department string, 9 | cp_catalog_number INT, 10 | cp_catalog_page_number INT, 11 | cp_description string, 12 | cp_type string 13 | ) 14 | CATALOG = 'SNOWFLAKE' 15 | EXTERNAL_VOLUME = '${exvol}' 16 | BASE_LOCATION = '${base_location}'; -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/iceberg_tables/1_create_customer_address.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | ICEBERG TABLE 3 | ${catalog}.${database}.customer_address( 4 | ca_address_sk INT, 5 | ca_address_id string, 6 | ca_street_number string, 7 | ca_street_name string, 8 | ca_street_type string, 9 | ca_suite_number string, 10 | ca_city string, 11 | ca_county string, 12 | ca_state string, 13 | ca_zip string, 14 | ca_country string, 15 | ca_gmt_offset DECIMAL( 16 | 5, 17 | 2 18 | ), 19 | ca_location_type string 20 | ) 21 | CATALOG = 'SNOWFLAKE' 22 | EXTERNAL_VOLUME = '${exvol}' 23 | BASE_LOCATION = '${base_location}'; -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/iceberg_tables/1_create_customer_demographics.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | ICEBERG TABLE 3 | ${catalog}.${database}.customer_demographics( 4 | cd_demo_sk INT, 5 | cd_gender string, 6 | cd_marital_status string, 7 | cd_education_status string, 8 | cd_purchase_estimate INT, 9 | cd_credit_rating string, 10 | cd_dep_count INT, 11 | cd_dep_employed_count INT, 12 | cd_dep_college_count INT 13 | ) 14 | CATALOG = 'SNOWFLAKE' 15 | EXTERNAL_VOLUME = '${exvol}' 16 | BASE_LOCATION = '${base_location}'; -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/iceberg_tables/1_create_household_demographics.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | ICEBERG TABLE 3 | ${catalog}.${database}.household_demographics( 4 | hd_demo_sk INT, 5 | hd_income_band_sk INT, 6 | hd_buy_potential string, 7 | hd_dep_count INT, 8 | hd_vehicle_count INT 9 | ) 10 | CATALOG = 'SNOWFLAKE' 11 | EXTERNAL_VOLUME = '${exvol}' 12 | BASE_LOCATION = '${base_location}'; -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/iceberg_tables/1_create_income_band.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | ICEBERG TABLE 3 | ${catalog}.${database}.income_band( 4 | ib_income_band_sk INT, 5 | ib_lower_bound INT, 6 | ib_upper_bound INT 7 | ) 8 | CATALOG = 'SNOWFLAKE' 9 | EXTERNAL_VOLUME = '${exvol}' 10 | BASE_LOCATION = '${base_location}'; -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/iceberg_tables/1_create_inventory.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | ICEBERG TABLE 3 | ${catalog}.${database}.inventory( 4 | inv_item_sk INT, 5 | inv_warehouse_sk INT, 6 | inv_quantity_on_hand INT, 7 | inv_date_sk INT 8 | ) 9 | CATALOG = 'SNOWFLAKE' 10 | EXTERNAL_VOLUME = '${exvol}' 11 | BASE_LOCATION = '${base_location}'; -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/iceberg_tables/1_create_reason.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | ICEBERG TABLE 3 | ${catalog}.${database}.reason( 4 | r_reason_sk INT, 5 | r_reason_id string, 6 | r_reason_desc string 7 | ) 8 | CATALOG = 'SNOWFLAKE' 9 | EXTERNAL_VOLUME = '${exvol}' 10 | BASE_LOCATION = '${base_location}'; -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/iceberg_tables/1_create_ship_mode.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | ICEBERG TABLE 3 | ${catalog}.${database}.ship_mode( 4 | sm_ship_mode_sk INT, 5 | sm_ship_mode_id string, 6 | sm_type string, 7 | sm_code string, 8 | sm_carrier string, 9 | sm_contract string 10 | ) 11 | CATALOG = 'SNOWFLAKE' 12 | EXTERNAL_VOLUME = '${exvol}' 13 | BASE_LOCATION = '${base_location}'; -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/iceberg_tables/1_create_time_dim.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | ICEBERG TABLE 3 | ${catalog}.${database}.time_dim( 4 | t_time_sk INT, 5 | t_time_id string, 6 | t_time INT, 7 | t_hour INT, 8 | t_minute INT, 9 | t_second INT, 10 | t_am_pm string, 11 | t_shift string, 12 | t_sub_shift string, 13 | t_meal_time string 14 | ) 15 | CATALOG = 'SNOWFLAKE' 16 | EXTERNAL_VOLUME = '${exvol}' 17 | BASE_LOCATION = '${base_location}'; -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/iceberg_tables/1_create_warehouse.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | ICEBERG TABLE 3 | ${catalog}.${database}.warehouse( 4 | w_warehouse_sk INT, 5 | w_warehouse_id string, 6 | w_warehouse_name string, 7 | w_warehouse_sq_ft INT, 8 | w_street_number string, 9 | w_street_name string, 10 | w_street_type string, 11 | w_suite_number string, 12 | w_city string, 13 | w_county string, 14 | w_state string, 15 | w_zip string, 16 | w_country string, 17 | w_gmt_offset DECIMAL( 18 | 5, 19 | 2 20 | ) 21 | ) 22 | CATALOG = 'SNOWFLAKE' 23 | EXTERNAL_VOLUME = '${exvol}' 24 | BASE_LOCATION = '${base_location}'; -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/iceberg_tables/1_create_web_page.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | ICEBERG TABLE 3 | ${catalog}.${database}.web_page( 4 | wp_web_page_sk INT, 5 | wp_web_page_id string, 6 | wp_rec_start_date DATE, 7 | wp_rec_end_date DATE, 8 | wp_creation_date_sk INT, 9 | wp_access_date_sk INT, 10 | wp_autogen_flag string, 11 | wp_customer_sk INT, 12 | wp_url string, 13 | wp_type string, 14 | wp_char_count INT, 15 | wp_link_count INT, 16 | wp_image_count INT, 17 | wp_max_ad_count INT 18 | ) 19 | CATALOG = 'SNOWFLAKE' 20 | EXTERNAL_VOLUME = '${exvol}' 21 | BASE_LOCATION = '${base_location}'; -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/native_tables/1_create_catalog_page.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.catalog_page( 4 | cp_catalog_page_sk INT, 5 | cp_catalog_page_id VARCHAR(16), 6 | cp_start_date_sk INT, 7 | cp_end_date_sk INT, 8 | cp_department VARCHAR(50), 9 | cp_catalog_number INT, 10 | cp_catalog_page_number INT, 11 | cp_description VARCHAR(100), 12 | cp_type VARCHAR(100) 13 | ); -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/native_tables/1_create_customer_address.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.customer_address( 4 | ca_address_sk INT, 5 | ca_address_id VARCHAR(16), 6 | ca_street_number VARCHAR(10), 7 | ca_street_name VARCHAR(60), 8 | ca_street_type VARCHAR(15), 9 | ca_suite_number VARCHAR(10), 10 | ca_city VARCHAR(60), 11 | ca_county VARCHAR(30), 12 | ca_state VARCHAR(2), 13 | ca_zip VARCHAR(10), 14 | ca_country VARCHAR(20), 15 | ca_gmt_offset DECIMAL( 16 | 5, 17 | 2 18 | ), 19 | ca_location_type VARCHAR(20) 20 | ); -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/native_tables/1_create_customer_demographics.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.customer_demographics( 4 | cd_demo_sk INT, 5 | cd_gender VARCHAR(1), 6 | cd_marital_status VARCHAR(1), 7 | cd_education_status VARCHAR(20), 8 | cd_purchase_estimate INT, 9 | cd_credit_rating VARCHAR(10), 10 | cd_dep_count INT, 11 | cd_dep_employed_count INT, 12 | cd_dep_college_count INT 13 | ); -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/native_tables/1_create_household_demographics.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.household_demographics( 4 | hd_demo_sk INT, 5 | hd_income_band_sk INT, 6 | hd_buy_potential VARCHAR(15), 7 | hd_dep_count INT, 8 | hd_vehicle_count INT 9 | ); -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/native_tables/1_create_income_band.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.income_band( 4 | ib_income_band_sk INT, 5 | ib_lower_bound INT, 6 | ib_upper_bound INT 7 | ); -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/native_tables/1_create_inventory.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.inventory( 4 | inv_item_sk INT, 5 | inv_warehouse_sk INT, 6 | inv_quantity_on_hand INT, 7 | inv_date_sk INT 8 | ); -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/native_tables/1_create_reason.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.reason( 4 | r_reason_sk INT, 5 | r_reason_id VARCHAR(16), 6 | r_reason_desc VARCHAR(100) 7 | ); -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/native_tables/1_create_ship_mode.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.ship_mode( 4 | sm_ship_mode_sk INT, 5 | sm_ship_mode_id VARCHAR(16), 6 | sm_type VARCHAR(30), 7 | sm_code VARCHAR(10), 8 | sm_carrier VARCHAR(20), 9 | sm_contract VARCHAR(20) 10 | ); -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/native_tables/1_create_time_dim.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.time_dim( 4 | t_time_sk INT, 5 | t_time_id VARCHAR(16), 6 | t_time INT, 7 | t_hour INT, 8 | t_minute INT, 9 | t_second INT, 10 | t_am_pm VARCHAR(2), 11 | t_shift VARCHAR(20), 12 | t_sub_shift VARCHAR(20), 13 | t_meal_time VARCHAR(20) 14 | ); -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/native_tables/1_create_warehouse.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.warehouse( 4 | w_warehouse_sk INT, 5 | w_warehouse_id VARCHAR(16), 6 | w_warehouse_name VARCHAR(20), 7 | w_warehouse_sq_ft INT, 8 | w_street_number VARCHAR(10), 9 | w_street_name VARCHAR(60), 10 | w_street_type VARCHAR(15), 11 | w_suite_number VARCHAR(10), 12 | w_city VARCHAR(60), 13 | w_county VARCHAR(30), 14 | w_state VARCHAR(2), 15 | w_zip VARCHAR(10), 16 | w_country VARCHAR(20), 17 | w_gmt_offset DECIMAL( 18 | 5, 19 | 2 20 | ) 21 | ); -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/build/native_tables/1_create_web_page.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.web_page( 4 | wp_web_page_sk INT, 5 | wp_web_page_id VARCHAR(16), 6 | wp_rec_start_date DATE, 7 | wp_rec_end_date DATE, 8 | wp_creation_date_sk INT, 9 | wp_access_date_sk INT, 10 | wp_autogen_flag VARCHAR(1), 11 | wp_customer_sk INT, 12 | wp_url VARCHAR(100), 13 | wp_type VARCHAR(50), 14 | wp_char_count INT, 15 | wp_link_count INT, 16 | wp_image_count INT, 17 | wp_max_ad_count INT 18 | ); -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/single_user/query22.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_product_name, 3 | i_brand, 4 | i_class, 5 | i_category, 6 | AVG( inv_quantity_on_hand ) qoh 7 | FROM 8 | ${catalog}.${database}.inventory ${asof_sf}, 9 | ${catalog}.${database}.date_dim, 10 | ${catalog}.${database}.item 11 | WHERE 12 | inv_date_sk = d_date_sk 13 | AND inv_item_sk = i_item_sk 14 | AND d_month_seq BETWEEN 1201 AND 1201 + 11 15 | GROUP BY 16 | ROLLUP( 17 | i_product_name, 18 | i_brand, 19 | i_class, 20 | i_category 21 | ) 22 | ORDER BY 23 | qoh, 24 | i_product_name, 25 | i_brand, 26 | i_class, 27 | i_category LIMIT 100; 28 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/single_user/query3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_brand_id brand_id, 4 | item.i_brand brand, 5 | SUM( ss_net_profit ) sum_agg 6 | FROM 7 | ${catalog}.${database}.date_dim dt, 8 | ${catalog}.${database}.store_sales ${asof_sf}, 9 | ${catalog}.${database}.item 10 | WHERE 11 | dt.d_date_sk = store_sales.ss_sold_date_sk 12 | AND store_sales.ss_item_sk = item.i_item_sk 13 | AND item.i_manufact_id = 445 14 | AND dt.d_moy = 12 15 | GROUP BY 16 | dt.d_year, 17 | item.i_brand, 18 | item.i_brand_id 19 | ORDER BY 20 | dt.d_year, 21 | sum_agg DESC, 22 | brand_id LIMIT 100; 23 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/single_user/query42.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_category_id, 4 | item.i_category, 5 | SUM( ss_ext_sales_price ) 6 | FROM 7 | ${catalog}.${database}.date_dim dt, 8 | ${catalog}.${database}.store_sales ${asof_sf}, 9 | ${catalog}.${database}.item 10 | WHERE 11 | dt.d_date_sk = store_sales.ss_sold_date_sk 12 | AND store_sales.ss_item_sk = item.i_item_sk 13 | AND item.i_manager_id = 1 14 | AND dt.d_moy = 11 15 | AND dt.d_year = 1998 16 | GROUP BY 17 | dt.d_year, 18 | item.i_category_id, 19 | item.i_category 20 | ORDER BY 21 | SUM( ss_ext_sales_price ) DESC, 22 | dt.d_year, 23 | item.i_category_id, 24 | item.i_category LIMIT 100; 25 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/single_user/query52.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_brand_id brand_id, 4 | item.i_brand brand, 5 | SUM( ss_ext_sales_price ) ext_price 6 | FROM 7 | ${catalog}.${database}.date_dim dt, 8 | ${catalog}.${database}.store_sales ${asof_sf}, 9 | ${catalog}.${database}.item 10 | WHERE 11 | dt.d_date_sk = store_sales.ss_sold_date_sk 12 | AND store_sales.ss_item_sk = item.i_item_sk 13 | AND item.i_manager_id = 1 14 | AND dt.d_moy = 11 15 | AND dt.d_year = 2000 16 | GROUP BY 17 | dt.d_year, 18 | item.i_brand, 19 | item.i_brand_id 20 | ORDER BY 21 | dt.d_year, 22 | ext_price DESC, 23 | brand_id LIMIT 100; 24 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/single_user/query55.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_brand_id brand_id, 3 | i_brand brand, 4 | SUM( ss_ext_sales_price ) ext_price 5 | FROM 6 | ${catalog}.${database}.date_dim, 7 | ${catalog}.${database}.store_sales ${asof_sf}, 8 | ${catalog}.${database}.item 9 | WHERE 10 | d_date_sk = ss_sold_date_sk 11 | AND ss_item_sk = i_item_sk 12 | AND i_manager_id = 20 13 | AND d_moy = 12 14 | AND d_year = 1998 15 | GROUP BY 16 | i_brand, 17 | i_brand_id 18 | ORDER BY 19 | ext_price DESC, 20 | i_brand_id LIMIT 100; 21 | -------------------------------------------------------------------------------- /core/run/snowflake-8.13.1/scripts/tpcds/single_user/query96.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | COUNT(*) 3 | FROM 4 | ${catalog}.${database}.store_sales ${asof_sf}, 5 | ${catalog}.${database}.household_demographics, 6 | ${catalog}.${database}.time_dim, 7 | ${catalog}.${database}.store 8 | WHERE 9 | ss_sold_time_sk = time_dim.t_time_sk 10 | AND ss_hdemo_sk = household_demographics.hd_demo_sk 11 | AND ss_store_sk = s_store_sk 12 | AND time_dim.t_hour = 8 13 | AND time_dim.t_minute >= 30 14 | AND household_demographics.hd_dep_count = 5 15 | AND store.s_store_name = 'ese' 16 | ORDER BY 17 | COUNT(*) LIMIT 100; 18 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/azure-pipelines/config/connections_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Connections Configuration 2 | --- 3 | version: 1 4 | connections: 5 | - id: spark_0 6 | driver: org.apache.hive.jdbc.HiveDriver 7 | url: jdbc:hive2://${SPARK_MASTER_HOST}:10000 8 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Telemetry Configuration 2 | --- 3 | version: 1 4 | connection: 5 | id: duckdb_0 6 | driver: org.duckdb.DuckDBDriver 7 | url: jdbc:duckdb:./telemetry-spark-3.3.1 8 | execute_ddl: true 9 | ddl_file: 'src/main/resources/scripts/logging/duckdb/ddl.sql' 10 | insert_file: 'src/main/resources/scripts/logging/duckdb/insert.sql' 11 | # The following parameter values will be used to replace the variables in the logging statements. 12 | parameter_values: 13 | data_path: '' -------------------------------------------------------------------------------- /core/run/spark-3.3.1/azure-pipelines/sh/cleanup-delta-2.2.0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | source env.sh 3 | if [ -z "${SPARK_HOME}" ]; then 4 | echo "ERROR: SPARK_HOME is not defined." 5 | exit 1 6 | fi 7 | 8 | rm $SPARK_HOME/jars/delta-core.jar 9 | rm $SPARK_HOME/jars/delta-storage.jar 10 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/azure-pipelines/sh/cleanup-hudi-0.12.2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | source env.sh 3 | if [ -z "${SPARK_HOME}" ]; then 4 | echo "ERROR: SPARK_HOME is not defined." 5 | exit 1 6 | fi 7 | 8 | rm $SPARK_HOME/jars/hudi-spark-bundle.jar 9 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/azure-pipelines/sh/cleanup-iceberg-1.1.0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | source env.sh 3 | if [ -z "${SPARK_HOME}" ]; then 4 | echo "ERROR: SPARK_HOME is not defined." 5 | exit 1 6 | fi 7 | 8 | rm $SPARK_HOME/jars/iceberg-spark-runtime.jar 9 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/azure-pipelines/sh/delta-2.2.0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | source env.sh 3 | if [ -z "${SPARK_HOME}" ]; then 4 | echo "ERROR: SPARK_HOME is not defined." 5 | exit 1 6 | fi 7 | 8 | wget -nv -N https://repo1.maven.org/maven2/io/delta/delta-core_2.12/2.2.0/delta-core_2.12-2.2.0.jar 9 | wget -nv -N https://repo1.maven.org/maven2/io/delta/delta-storage/2.2.0/delta-storage-2.2.0.jar 10 | 11 | ln -sf $(pwd)/delta-core_2.12-2.2.0.jar $SPARK_HOME/jars/delta-core.jar 12 | ln -sf $(pwd)/delta-storage-2.2.0.jar $SPARK_HOME/jars/delta-storage.jar 13 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/azure-pipelines/sh/dist-exec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | source env.sh 3 | if [ -z "${HOSTS}" ]; then 4 | echo "ERROR: HOSTS is not defined." 5 | exit 1 6 | fi 7 | 8 | if [ "$#" -lt 2 ]; then 9 | echo "Error: Please provide at least two input parameters." 10 | exit 1 11 | fi 12 | deploy_dir=$1 13 | script_file=$2 14 | 15 | for node in $HOSTS ; do ssh -t $node "mkdir -p ~/$deploy_dir" ; done 16 | for node in $HOSTS ; do scp *.template $node:~/$deploy_dir ; done 17 | for node in $HOSTS ; do scp $script_file $node:~/$deploy_dir ; done 18 | for node in $HOSTS ; do ssh -t $node "cd ~/$deploy_dir && chmod +x ./$script_file && ./$script_file ${@:3}" ; done 19 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/azure-pipelines/sh/hudi-0.12.2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | source env.sh 3 | if [ -z "${SPARK_HOME}" ]; then 4 | echo "ERROR: SPARK_HOME is not defined." 5 | exit 1 6 | fi 7 | 8 | wget -nv -N https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark3.3-bundle_2.12/0.12.2/hudi-spark3.3-bundle_2.12-0.12.2.jar 9 | 10 | ln -sf $(pwd)/hudi-spark3.3-bundle_2.12-0.12.2.jar $SPARK_HOME/jars/hudi-spark-bundle.jar 11 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/azure-pipelines/sh/iceberg-1.1.0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | source env.sh 3 | if [ -z "${SPARK_HOME}" ]; then 4 | echo "ERROR: SPARK_HOME is not defined." 5 | exit 1 6 | fi 7 | 8 | wget -nv -N https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.1.0/iceberg-spark-runtime-3.3_2.12-1.1.0.jar 9 | 10 | ln -sf $(pwd)/iceberg-spark-runtime-3.3_2.12-1.1.0.jar $SPARK_HOME/jars/iceberg-spark-runtime.jar 11 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/azure-pipelines/sh/spark-env.sh.template: -------------------------------------------------------------------------------- 1 | SPARK_MASTER_HOST=$SPARK_MASTER_HOST 2 | JAVA_HOME=$JAVA_HOME -------------------------------------------------------------------------------- /core/run/spark-3.3.1/azure-pipelines/sh/stop-cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | source env.sh 3 | if [ -z "${SPARK_HOME}" ]; then 4 | echo "ERROR: SPARK_HOME is not defined." 5 | exit 1 6 | fi 7 | 8 | cd $SPARK_HOME 9 | 10 | echo "Stopping thrift server" 11 | ./sbin/stop-thriftserver.sh 12 | 13 | echo "Stopping history server" 14 | ./sbin/stop-history-server.sh 15 | 16 | echo "Stopping spark cluster" 17 | ./sbin/stop-all.sh -------------------------------------------------------------------------------- /core/run/spark-3.3.1/results/spark-3.3.1-2024-02-01-8xStandard_E8s_v5.duckdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lst-bench/8e8c8592d4763c2dd58d7e28e78e6dd1dada0a8e/core/run/spark-3.3.1/results/spark-3.3.1-2024-02-01-8xStandard_E8s_v5.duckdb -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_page.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.catalog_page( 4 | cp_catalog_page_sk INT, 5 | cp_catalog_page_id VARCHAR(16), 6 | cp_start_date_sk INT, 7 | cp_end_date_sk INT, 8 | cp_department VARCHAR(50), 9 | cp_catalog_number INT, 10 | cp_catalog_page_number INT, 11 | cp_description VARCHAR(100), 12 | cp_type VARCHAR(100) 13 | ) 14 | USING ${table_format} OPTIONS( 15 | PATH '${data_path}${experiment_start_time}/${repetition}/catalog_page/' 16 | ) TBLPROPERTIES( 17 | 'primaryKey' = 'cp_catalog_page_sk' ${tblproperties_suffix} 18 | ); 19 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/1_create_customer_demographics.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.customer_demographics( 4 | cd_demo_sk INT, 5 | cd_gender VARCHAR(1), 6 | cd_marital_status VARCHAR(1), 7 | cd_education_status VARCHAR(20), 8 | cd_purchase_estimate INT, 9 | cd_credit_rating VARCHAR(10), 10 | cd_dep_count INT, 11 | cd_dep_employed_count INT, 12 | cd_dep_college_count INT 13 | ) 14 | USING ${table_format} OPTIONS( 15 | PATH '${data_path}${experiment_start_time}/${repetition}/customer_demographics/' 16 | ) TBLPROPERTIES( 17 | 'primaryKey' = 'cd_demo_sk' ${tblproperties_suffix} 18 | ); 19 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/1_create_household_demographics.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.household_demographics( 4 | hd_demo_sk INT, 5 | hd_income_band_sk INT, 6 | hd_buy_potential VARCHAR(15), 7 | hd_dep_count INT, 8 | hd_vehicle_count INT 9 | ) 10 | USING ${table_format} OPTIONS( 11 | PATH '${data_path}${experiment_start_time}/${repetition}/household_demographics/' 12 | ) TBLPROPERTIES( 13 | 'primaryKey' = 'hd_demo_sk' ${tblproperties_suffix} 14 | ); 15 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/1_create_income_band.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.income_band( 4 | ib_income_band_sk INT, 5 | ib_lower_bound INT, 6 | ib_upper_bound INT 7 | ) 8 | USING ${table_format} OPTIONS( 9 | PATH '${data_path}${experiment_start_time}/${repetition}/income_band/' 10 | ) TBLPROPERTIES( 11 | 'primaryKey' = 'ib_income_band_sk' ${tblproperties_suffix} 12 | ); 13 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/1_create_inventory.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.inventory( 4 | inv_item_sk INT, 5 | inv_warehouse_sk INT, 6 | inv_quantity_on_hand INT, 7 | inv_date_sk INT 8 | ) 9 | USING ${table_format} OPTIONS( 10 | PATH '${data_path}${experiment_start_time}/${repetition}/inventory/' 11 | ) PARTITIONED BY(inv_date_sk) TBLPROPERTIES( 12 | 'primaryKey' = 'inv_date_sk,inv_item_sk,inv_warehouse_sk' ${tblproperties_suffix} 13 | ); 14 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/1_create_reason.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.reason( 4 | r_reason_sk INT, 5 | r_reason_id VARCHAR(16), 6 | r_reason_desc VARCHAR(100) 7 | ) 8 | USING ${table_format} OPTIONS( 9 | PATH '${data_path}${experiment_start_time}/${repetition}/reason/' 10 | ) TBLPROPERTIES( 11 | 'primaryKey' = 'r_reason_sk' ${tblproperties_suffix} 12 | ); 13 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/1_create_ship_mode.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.ship_mode( 4 | sm_ship_mode_sk INT, 5 | sm_ship_mode_id VARCHAR(16), 6 | sm_type VARCHAR(30), 7 | sm_code VARCHAR(10), 8 | sm_carrier VARCHAR(20), 9 | sm_contract VARCHAR(20) 10 | ) 11 | USING ${table_format} OPTIONS( 12 | PATH '${data_path}${experiment_start_time}/${repetition}/ship_mode/' 13 | ) TBLPROPERTIES( 14 | 'primaryKey' = 'sm_ship_mode_sk' ${tblproperties_suffix} 15 | ); 16 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/1_create_time_dim.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.time_dim( 4 | t_time_sk INT, 5 | t_time_id VARCHAR(16), 6 | t_time INT, 7 | t_hour INT, 8 | t_minute INT, 9 | t_second INT, 10 | t_am_pm VARCHAR(2), 11 | t_shift VARCHAR(20), 12 | t_sub_shift VARCHAR(20), 13 | t_meal_time VARCHAR(20) 14 | ) 15 | USING ${table_format} OPTIONS( 16 | PATH '${data_path}${experiment_start_time}/${repetition}/time_dim/' 17 | ) TBLPROPERTIES( 18 | 'primaryKey' = 't_time_sk' ${tblproperties_suffix} 19 | ); 20 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_call_center.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.call_center SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.call_center; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_page.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.catalog_page SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.catalog_page; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_returns.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.catalog_returns SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.catalog_returns; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_sales.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.catalog_sales SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.catalog_sales; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_customer.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.customer SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.customer; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_customer_address.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.customer_address SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.customer_address; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_customer_demographics.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.customer_demographics SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.customer_demographics; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_date_dim.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.date_dim SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.date_dim; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_household_demographics.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.household_demographics SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.household_demographics; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_income_band.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.income_band SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.income_band; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_inventory.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.inventory SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.inventory; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_item.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.item SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.item; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_promotion.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.promotion SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.promotion; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_reason.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.reason SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.reason; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_ship_mode.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.ship_mode SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.ship_mode; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_store.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.store SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.store; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_store_returns.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.store_returns SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.store_returns; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_store_sales.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.store_sales SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.store_sales; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_time_dim.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.time_dim SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.time_dim; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_warehouse.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.warehouse SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.warehouse; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_web_page.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.web_page SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.web_page; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_web_returns.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.web_returns SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.web_returns; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_web_sales.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.web_sales SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.web_sales; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/2_load_web_site.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.web_site SELECT 4 | * 5 | FROM 6 | ${external_catalog}.${external_database}.web_site; 7 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_call_center.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.call_center COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_page.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.catalog_page COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_returns.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.catalog_returns COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_sales.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.catalog_sales COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.customer COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer_address.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.customer_address COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer_demographics.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.customer_demographics COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_date_dim.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.date_dim COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_household_demographics.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.household_demographics COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_income_band.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.income_band COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_inventory.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.inventory COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_item.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.item COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_promotion.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.promotion COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_reason.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.reason COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_ship_mode.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.ship_mode COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_store.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.store COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_store_returns.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.store_returns COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_store_sales.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.store_sales COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_time_dim.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.time_dim COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_warehouse.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.warehouse COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_page.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.web_page COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_returns.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.web_returns COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_sales.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.web_sales COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_site.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.web_site COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_1.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cs_order_number as cr_order_number 3 | FROM 4 | ${catalog}.${database}.catalog_sales, 5 | ${catalog}.${database}.date_dim 6 | WHERE 7 | cs_sold_date_sk = d_date_sk 8 | AND d_date BETWEEN '${param1}' AND '${param2}'; 9 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_2.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cs_order_number as cr_order_number 3 | FROM 4 | ${catalog}.${database}.catalog_sales, 5 | ${catalog}.${database}.date_dim 6 | WHERE 7 | cs_sold_date_sk = d_date_sk 8 | AND d_date BETWEEN '${param3}' AND '${param4}'; 9 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cs_order_number as cr_order_number 3 | FROM 4 | ${catalog}.${database}.catalog_sales, 5 | ${catalog}.${database}.date_dim 6 | WHERE 7 | cs_sold_date_sk = d_date_sk 8 | AND d_date BETWEEN '${param5}' AND '${param6}'; 9 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_delete.sql: -------------------------------------------------------------------------------- 1 | DELETE FROM ${catalog}.${database}.catalog_returns 2 | WHERE cr_order_number IN (${cr_order_number}); 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_1.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cs_item_sk, cs_order_number 3 | FROM 4 | ${catalog}.${database}.catalog_sales 5 | WHERE 6 | cs_sold_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param1}' AND '${param2}' 13 | ) 14 | AND cs_sold_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param1}' AND '${param2}' 21 | ); 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_2.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cs_item_sk, cs_order_number 3 | FROM 4 | ${catalog}.${database}.catalog_sales 5 | WHERE 6 | cs_sold_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param3}' AND '${param4}' 13 | ) 14 | AND cs_sold_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param3}' AND '${param4}' 21 | ); 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cs_item_sk, cs_order_number 3 | FROM 4 | ${catalog}.${database}.catalog_sales 5 | WHERE 6 | cs_sold_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param5}' AND '${param6}' 13 | ) 14 | AND cs_sold_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param5}' AND '${param6}' 21 | ); 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_delete.sql: -------------------------------------------------------------------------------- 1 | DELETE FROM ${catalog}.${database}.catalog_sales 2 | WHERE (cs_item_sk, cs_order_number) IN (${multi_values_clause}); 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_1.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | inv_date_sk, inv_item_sk, inv_warehouse_sk 3 | FROM 4 | ${catalog}.${database}.inventory 5 | WHERE 6 | inv_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param7}' AND '${param8}' 13 | ) 14 | AND inv_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param7}' AND '${param8}' 21 | ); -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_2.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | inv_date_sk, inv_item_sk, inv_warehouse_sk 3 | FROM 4 | ${catalog}.${database}.inventory 5 | WHERE 6 | inv_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param9}' AND '${param10}' 13 | ) 14 | AND inv_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param9}' AND '${param10}' 21 | ); -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | inv_date_sk, inv_item_sk, inv_warehouse_sk 3 | FROM 4 | ${catalog}.${database}.inventory 5 | WHERE 6 | inv_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param11}' AND '${param12}' 13 | ) 14 | AND inv_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param11}' AND '${param12}' 21 | ); -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_delete.sql: -------------------------------------------------------------------------------- 1 | DELETE FROM ${catalog}.${database}.inventory 2 | WHERE (inv_date_sk, inv_item_sk, inv_warehouse_sk) IN (${multi_values_clause}); 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_1.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sr_item_sk, sr_ticket_number 3 | FROM 4 | ${catalog}.${database}.store_returns 5 | WHERE 6 | sr_ticket_number IN( 7 | SELECT 8 | ss_ticket_number 9 | FROM 10 | ${catalog}.${database}.store_sales, 11 | ${catalog}.${database}.date_dim 12 | WHERE 13 | ss_sold_date_sk = d_date_sk 14 | AND d_date BETWEEN '${param1}' AND '${param2}' 15 | ); -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_2.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sr_item_sk, sr_ticket_number 3 | FROM 4 | ${catalog}.${database}.store_returns 5 | WHERE 6 | sr_ticket_number IN( 7 | SELECT 8 | ss_ticket_number 9 | FROM 10 | ${catalog}.${database}.store_sales, 11 | ${catalog}.${database}.date_dim 12 | WHERE 13 | ss_sold_date_sk = d_date_sk 14 | AND d_date BETWEEN '${param3}' AND '${param4}' 15 | ); 16 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sr_item_sk, sr_ticket_number 3 | FROM 4 | ${catalog}.${database}.store_returns 5 | WHERE 6 | sr_ticket_number IN( 7 | SELECT 8 | ss_ticket_number 9 | FROM 10 | ${catalog}.${database}.store_sales, 11 | ${catalog}.${database}.date_dim 12 | WHERE 13 | ss_sold_date_sk = d_date_sk 14 | AND d_date BETWEEN '${param5}' AND '${param6}' 15 | ); 16 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_delete.sql: -------------------------------------------------------------------------------- 1 | DELETE FROM ${catalog}.${database}.store_returns 2 | WHERE (sr_item_sk, sr_ticket_number) IN (${multi_values_clause}); 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_1.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ss_item_sk, ss_ticket_number 3 | FROM 4 | ${catalog}.${database}.store_sales 5 | WHERE 6 | ss_sold_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param1}' AND '${param2}' 13 | ) 14 | AND ss_sold_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param1}' AND '${param2}' 21 | ); 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_2.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ss_item_sk, ss_ticket_number 3 | FROM 4 | ${catalog}.${database}.store_sales 5 | WHERE 6 | ss_sold_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param3}' AND '${param4}' 13 | ) 14 | AND ss_sold_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param3}' AND '${param4}' 21 | ); 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ss_item_sk, ss_ticket_number 3 | FROM 4 | ${catalog}.${database}.store_sales 5 | WHERE 6 | ss_sold_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param5}' AND '${param6}' 13 | ) 14 | AND ss_sold_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param5}' AND '${param6}' 21 | ); 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_delete.sql: -------------------------------------------------------------------------------- 1 | DELETE FROM ${catalog}.${database}.store_sales 2 | WHERE (ss_item_sk, ss_ticket_number) IN (${multi_values_clause}); 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_1.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | wr_item_sk, wr_order_number 3 | FROM 4 | ${catalog}.${database}.web_returns 5 | WHERE 6 | wr_order_number IN( 7 | SELECT 8 | ws_order_number 9 | FROM 10 | ${catalog}.${database}.web_sales, 11 | ${catalog}.${database}.date_dim 12 | WHERE 13 | ws_sold_date_sk = d_date_sk 14 | AND d_date BETWEEN '${param1}' AND '${param2}' 15 | ); 16 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_2.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | wr_item_sk, wr_order_number 3 | FROM 4 | ${catalog}.${database}.web_returns 5 | WHERE 6 | wr_order_number IN( 7 | SELECT 8 | ws_order_number 9 | FROM 10 | ${catalog}.${database}.web_sales, 11 | ${catalog}.${database}.date_dim 12 | WHERE 13 | ws_sold_date_sk = d_date_sk 14 | AND d_date BETWEEN '${param3}' AND '${param4}' 15 | ); 16 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | wr_item_sk, wr_order_number 3 | FROM 4 | ${catalog}.${database}.web_returns 5 | WHERE 6 | wr_order_number IN( 7 | SELECT 8 | ws_order_number 9 | FROM 10 | ${catalog}.${database}.web_sales, 11 | ${catalog}.${database}.date_dim 12 | WHERE 13 | ws_sold_date_sk = d_date_sk 14 | AND d_date BETWEEN '${param5}' AND '${param6}' 15 | ); 16 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_delete.sql: -------------------------------------------------------------------------------- 1 | DELETE FROM ${catalog}.${database}.web_returns 2 | WHERE (wr_item_sk, wr_order_number) IN (${multi_values_clause}); 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_1.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ws_item_sk, ws_order_number 3 | FROM 4 | ${catalog}.${database}.web_sales 5 | WHERE 6 | ws_sold_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param1}' AND '${param2}' 13 | ) 14 | AND ws_sold_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param1}' AND '${param2}' 21 | ); 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_2.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ws_item_sk, ws_order_number 3 | FROM 4 | ${catalog}.${database}.web_sales 5 | WHERE 6 | ws_sold_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param5}' AND '${param6}' 13 | ) 14 | AND ws_sold_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param5}' AND '${param6}' 21 | ); 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ws_item_sk, ws_order_number 3 | FROM 4 | ${catalog}.${database}.web_sales 5 | WHERE 6 | ws_sold_date_sk >=( 7 | SELECT 8 | MIN( d_date_sk ) 9 | FROM 10 | ${catalog}.${database}.date_dim 11 | WHERE 12 | d_date BETWEEN '${param5}' AND '${param6}' 13 | ) 14 | AND ws_sold_date_sk <=( 15 | SELECT 16 | MAX( d_date_sk ) 17 | FROM 18 | ${catalog}.${database}.date_dim 19 | WHERE 20 | d_date BETWEEN '${param5}' AND '${param6}' 21 | ); 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_delete.sql: -------------------------------------------------------------------------------- 1 | DELETE FROM ${catalog}.${database}.web_sales 2 | WHERE (ws_item_sk, ws_order_number) IN (${multi_values_clause}); 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_1.sql: -------------------------------------------------------------------------------- 1 | DROP 2 | VIEW IF EXISTS ${external_catalog}.${external_database}.crv_${stream_num}; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | row_number 3 | FROM 4 | ${external_catalog}.${external_database}.crv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_1.sql: -------------------------------------------------------------------------------- 1 | DROP 2 | VIEW IF EXISTS ${external_catalog}.${external_database}.csv_${stream_num}; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | row_number 3 | FROM 4 | ${external_catalog}.${external_database}.csv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_1.sql: -------------------------------------------------------------------------------- 1 | DROP 2 | VIEW IF EXISTS ${external_catalog}.${external_database}.iv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | row_number 3 | FROM 4 | ${external_catalog}.${external_database}.iv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_insert.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.inventory 2 | SELECT 3 | INV_ITEM_SK, 4 | INV_WAREHOUSE_SK, 5 | INV_QUANTITY_ON_HAND, 6 | INV_DATE_SK 7 | FROM 8 | ${external_catalog}.${external_database}.iv_${stream_num} 9 | WHERE row_number IN (${row_number}); -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_1.sql: -------------------------------------------------------------------------------- 1 | DROP 2 | VIEW IF EXISTS ${external_catalog}.${external_database}.srv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | row_number 3 | FROM 4 | ${external_catalog}.${external_database}.srv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_insert.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.store_returns 2 | SELECT 3 | sr_return_time_sk, 4 | sr_item_sk, 5 | sr_customer_sk, 6 | sr_cdemo_sk, 7 | sr_hdemo_sk, 8 | sr_addr_sk, 9 | sr_store_sk, 10 | sr_reason_sk, 11 | sr_ticket_number, 12 | sr_return_quantity, 13 | sr_return_amt, 14 | sr_return_tax, 15 | sr_return_amt_inc_tax, 16 | sr_fee, 17 | sr_return_ship_cost, 18 | sr_refunded_cash, 19 | sr_reversed_charge, 20 | sr_store_credit, 21 | sr_net_loss, 22 | sr_returned_date_sk 23 | FROM 24 | ${external_catalog}.${external_database}.srv_${stream_num} 25 | WHERE row_number IN (${row_number}); -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_1.sql: -------------------------------------------------------------------------------- 1 | DROP 2 | VIEW IF EXISTS ${external_catalog}.${external_database}.ssv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | row_number 3 | FROM 4 | ${external_catalog}.${external_database}.ssv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_1.sql: -------------------------------------------------------------------------------- 1 | DROP 2 | VIEW IF EXISTS ${external_catalog}.${external_database}.wrv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | row_number 3 | FROM 4 | ${external_catalog}.${external_database}.wrv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_1.sql: -------------------------------------------------------------------------------- 1 | DROP 2 | VIEW IF EXISTS ${external_catalog}.${external_database}.wsv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | row_number 3 | FROM 4 | ${external_catalog}.${external_database}.wsv_${stream_num}; -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.call_center; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.call_center' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.call_center'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.catalog_page; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.catalog_page' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.catalog_page'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.catalog_returns; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.catalog_returns' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.catalog_returns'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.catalog_sales; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.catalog_sales' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.catalog_sales'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_customer-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.customer; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_customer-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.customer' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_customer-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.customer'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.customer_address; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.customer_address' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.customer_address'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.customer_demographics; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.customer_demographics' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.customer_demographics'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.date_dim; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.date_dim' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.date_dim'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.household_demographics; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.household_demographics' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.household_demographics'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.income_band; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.income_band' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.income_band'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.inventory; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.inventory' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.inventory'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_item-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.item; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_item-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.item' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_item-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.item'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.promotion; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.promotion' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.promotion'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_reason-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.reason; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_reason-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.reason' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_reason-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.reason'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.ship_mode; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.ship_mode' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.ship_mode'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_store-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.store; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_store-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.store' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_store-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.store'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.store_returns; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.store_returns' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.store_returns'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.store_sales; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.store_sales' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.store_sales'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.time_dim; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.time_dim' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.time_dim'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.warehouse; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.warehouse' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.warehouse'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.web_page; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.web_page' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.web_page'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.web_returns; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.web_returns' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.web_returns'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.web_sales; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.web_sales' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.web_sales'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.web_site; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.web_site' 3 | ); 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files('${database}.web_site'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.catalog_returns WHERE cr_returned_date_sk IN (${cr_returned_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.catalog_returns', 3 | predicate => 'cr_returned_date_sk IN (${cr_returned_date_sk})' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.catalog_returns', where => 'cr_returned_date_sk IN (${cr_returned_date_sk})'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.catalog_returns WHERE cr_returned_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.catalog_returns', 3 | predicate => 'cr_returned_date_sk IS NULL' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.catalog_returns', where => 'cr_returned_date_sk IS NULL'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT cr_returned_date_sk AS cr_returned_date_sk 2 | FROM ${catalog}.${database}.catalog_returns 3 | WHERE cr_returned_date_sk IS NOT NULL 4 | ORDER BY cr_returned_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.catalog_sales WHERE cs_sold_date_sk IN (${cs_sold_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.catalog_sales', 3 | predicate => 'cs_sold_date_sk IN (${cs_sold_date_sk})' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.catalog_sales', where => 'cs_sold_date_sk IN (${cs_sold_date_sk})'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.catalog_sales WHERE cs_sold_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.catalog_sales', 3 | predicate => 'cs_sold_date_sk IS NULL' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.catalog_sales', where => 'cs_sold_date_sk IS NULL'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT cs_sold_date_sk AS cs_sold_date_sk 2 | FROM ${catalog}.${database}.catalog_sales 3 | WHERE cs_sold_date_sk IS NOT NULL 4 | ORDER BY cs_sold_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.inventory WHERE inv_date_sk IN (${inv_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.inventory', 3 | predicate => 'inv_date_sk IN (${inv_date_sk})' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.inventory', where => 'inv_date_sk IN (${inv_date_sk})'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.inventory WHERE inv_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.inventory', 3 | predicate => 'inv_date_sk IS NULL' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.inventory', where => 'inv_date_sk IS NULL'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT inv_date_sk AS inv_date_sk 2 | FROM ${catalog}.${database}.inventory 3 | WHERE inv_date_sk IS NOT NULL 4 | ORDER BY inv_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.store_returns WHERE sr_returned_date_sk IN (${sr_returned_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.store_returns', 3 | predicate => 'sr_returned_date_sk IN (${sr_returned_date_sk})' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.store_returns', where => 'sr_returned_date_sk IN (${sr_returned_date_sk})'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.store_returns WHERE sr_returned_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.store_returns', 3 | predicate => 'sr_returned_date_sk IS NULL' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.store_returns', where => 'sr_returned_date_sk IS NULL'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT sr_returned_date_sk AS sr_returned_date_sk 2 | FROM ${catalog}.${database}.store_returns 3 | WHERE sr_returned_date_sk IS NOT NULL 4 | ORDER BY sr_returned_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.store_sales WHERE ss_sold_date_sk IN (${ss_sold_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.store_sales', 3 | predicate => 'ss_sold_date_sk IN (${ss_sold_date_sk})' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.store_sales', where => 'ss_sold_date_sk IN (${ss_sold_date_sk})'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.store_sales WHERE ss_sold_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.store_sales', 3 | predicate => 'ss_sold_date_sk IS NULL' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.store_sales', where => 'ss_sold_date_sk IS NULL'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT ss_sold_date_sk AS ss_sold_date_sk 2 | FROM ${catalog}.${database}.store_sales 3 | WHERE ss_sold_date_sk IS NOT NULL 4 | ORDER BY ss_sold_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.web_returns WHERE wr_returned_date_sk IN (${wr_returned_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.web_returns', 3 | predicate => 'wr_returned_date_sk IN (${wr_returned_date_sk})' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.web_returns', where => 'wr_returned_date_sk IN (${wr_returned_date_sk})'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.web_returns WHERE wr_returned_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.web_returns', 3 | predicate => 'wr_returned_date_sk IS NULL' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.web_returns', where => 'wr_returned_date_sk IS NULL'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT wr_returned_date_sk AS wr_returned_date_sk 2 | FROM ${catalog}.${database}.web_returns 3 | WHERE wr_returned_date_sk IS NOT NULL 4 | ORDER BY wr_returned_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.web_sales WHERE ws_sold_date_sk IN (${ws_sold_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.web_sales', 3 | predicate => 'ws_sold_date_sk IN (${ws_sold_date_sk})' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.web_sales', where => 'ws_sold_date_sk IN (${ws_sold_date_sk})'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-delta.sql: -------------------------------------------------------------------------------- 1 | OPTIMIZE ${database}.web_sales WHERE ws_sold_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-hudi.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.run_clustering( 2 | table => '${database}.web_sales', 3 | predicate => 'ws_sold_date_sk IS NULL' 4 | ); 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-iceberg.sql: -------------------------------------------------------------------------------- 1 | CALL ${catalog}.system.rewrite_data_files(table => '${database}.web_sales', where => 'ws_sold_date_sk IS NULL'); 2 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT ws_sold_date_sk AS ws_sold_date_sk 2 | FROM ${catalog}.${database}.web_sales 3 | WHERE ws_sold_date_sk IS NOT NULL 4 | ORDER BY ws_sold_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/single_user/query22.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_product_name, 3 | i_brand, 4 | i_class, 5 | i_category, 6 | AVG( inv_quantity_on_hand ) qoh 7 | FROM 8 | ${catalog}.${database}.inventory ${asof}, 9 | ${catalog}.${database}.date_dim, 10 | ${catalog}.${database}.item 11 | WHERE 12 | inv_date_sk = d_date_sk 13 | AND inv_item_sk = i_item_sk 14 | AND d_month_seq BETWEEN 1201 AND 1201 + 11 15 | GROUP BY 16 | ROLLUP( 17 | i_product_name, 18 | i_brand, 19 | i_class, 20 | i_category 21 | ) 22 | ORDER BY 23 | qoh, 24 | i_product_name, 25 | i_brand, 26 | i_class, 27 | i_category LIMIT 100; 28 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/single_user/query3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_brand_id brand_id, 4 | item.i_brand brand, 5 | SUM( ss_net_profit ) sum_agg 6 | FROM 7 | ${catalog}.${database}.date_dim dt, 8 | ${catalog}.${database}.store_sales ${asof}, 9 | ${catalog}.${database}.item 10 | WHERE 11 | dt.d_date_sk = store_sales.ss_sold_date_sk 12 | AND store_sales.ss_item_sk = item.i_item_sk 13 | AND item.i_manufact_id = 445 14 | AND dt.d_moy = 12 15 | GROUP BY 16 | dt.d_year, 17 | item.i_brand, 18 | item.i_brand_id 19 | ORDER BY 20 | dt.d_year, 21 | sum_agg DESC, 22 | brand_id LIMIT 100; 23 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/single_user/query42.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_category_id, 4 | item.i_category, 5 | SUM( ss_ext_sales_price ) 6 | FROM 7 | ${catalog}.${database}.date_dim dt, 8 | ${catalog}.${database}.store_sales ${asof}, 9 | ${catalog}.${database}.item 10 | WHERE 11 | dt.d_date_sk = store_sales.ss_sold_date_sk 12 | AND store_sales.ss_item_sk = item.i_item_sk 13 | AND item.i_manager_id = 1 14 | AND dt.d_moy = 11 15 | AND dt.d_year = 1998 16 | GROUP BY 17 | dt.d_year, 18 | item.i_category_id, 19 | item.i_category 20 | ORDER BY 21 | SUM( ss_ext_sales_price ) DESC, 22 | dt.d_year, 23 | item.i_category_id, 24 | item.i_category LIMIT 100; 25 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/single_user/query52.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_brand_id brand_id, 4 | item.i_brand brand, 5 | SUM( ss_ext_sales_price ) ext_price 6 | FROM 7 | ${catalog}.${database}.date_dim dt, 8 | ${catalog}.${database}.store_sales ${asof}, 9 | ${catalog}.${database}.item 10 | WHERE 11 | dt.d_date_sk = store_sales.ss_sold_date_sk 12 | AND store_sales.ss_item_sk = item.i_item_sk 13 | AND item.i_manager_id = 1 14 | AND dt.d_moy = 11 15 | AND dt.d_year = 2000 16 | GROUP BY 17 | dt.d_year, 18 | item.i_brand, 19 | item.i_brand_id 20 | ORDER BY 21 | dt.d_year, 22 | ext_price DESC, 23 | brand_id LIMIT 100; 24 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/single_user/query55.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_brand_id brand_id, 3 | i_brand brand, 4 | SUM( ss_ext_sales_price ) ext_price 5 | FROM 6 | ${catalog}.${database}.date_dim, 7 | ${catalog}.${database}.store_sales ${asof}, 8 | ${catalog}.${database}.item 9 | WHERE 10 | d_date_sk = ss_sold_date_sk 11 | AND ss_item_sk = i_item_sk 12 | AND i_manager_id = 20 13 | AND d_moy = 12 14 | AND d_year = 1998 15 | GROUP BY 16 | i_brand, 17 | i_brand_id 18 | ORDER BY 19 | ext_price DESC, 20 | i_brand_id LIMIT 100; 21 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpcds/single_user/query96.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | COUNT(*) 3 | FROM 4 | ${catalog}.${database}.store_sales ${asof}, 5 | ${catalog}.${database}.household_demographics, 6 | ${catalog}.${database}.time_dim, 7 | ${catalog}.${database}.store 8 | WHERE 9 | ss_sold_time_sk = time_dim.t_time_sk 10 | AND ss_hdemo_sk = household_demographics.hd_demo_sk 11 | AND ss_store_sk = s_store_sk 12 | AND time_dim.t_hour = 8 13 | AND time_dim.t_minute >= 30 14 | AND household_demographics.hd_dep_count = 5 15 | AND store.s_store_name = 'ese' 16 | ORDER BY 17 | COUNT(*) LIMIT 100; 18 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/1_create_customer.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.customer( 4 | c_custkey BIGINT, 5 | c_name VARCHAR(25), 6 | c_address VARCHAR(40), 7 | c_nationkey BIGINT, 8 | c_phone CHAR(15), 9 | c_acctbal DECIMAL, 10 | c_comment VARCHAR(117), 11 | c_mktsegment CHAR(10) 12 | ) 13 | USING ${table_format} OPTIONS( 14 | PATH '${data_path}${experiment_start_time}/${repetition}/customer/' 15 | ) TBLPROPERTIES( 16 | 'primaryKey' = 'c_custkey' ${tblproperties_suffix} 17 | ); 18 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/1_create_nation.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.nation( 4 | n_nationkey BIGINT, 5 | n_name CHAR(25), 6 | n_regionkey BIGINT, 7 | n_comment VARCHAR(152) 8 | ) 9 | USING ${table_format} OPTIONS( 10 | PATH '${data_path}${experiment_start_time}/${repetition}/nation/' 11 | ) TBLPROPERTIES( 12 | 'primaryKey' = 'n_nationkey' ${tblproperties_suffix} 13 | ); 14 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/1_create_orders.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.orders( 4 | o_orderkey BIGINT, 5 | o_custkey BIGINT, 6 | o_orderstatus CHAR(1), 7 | o_totalprice DECIMAL, 8 | o_orderpriority CHAR(15), 9 | o_clerk CHAR(15), 10 | o_shippriority INT, 11 | o_comment VARCHAR(79), 12 | o_orderdate DATE 13 | ) 14 | USING ${table_format} OPTIONS( 15 | PATH '${data_path}${experiment_start_time}/${repetition}/orders/' 16 | ) TBLPROPERTIES( 17 | 'primaryKey' = 'o_orderkey' ${tblproperties_suffix} 18 | ); 19 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/1_create_part.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.part( 4 | p_partkey BIGINT, 5 | p_name VARCHAR(55), 6 | p_mfgr CHAR(25), 7 | p_type VARCHAR(25), 8 | p_size INT, 9 | p_container CHAR(10), 10 | p_retailprice DECIMAL, 11 | p_comment VARCHAR(23), 12 | p_brand CHAR(10) 13 | ) 14 | USING ${table_format} OPTIONS( 15 | PATH '${data_path}${experiment_start_time}/${repetition}/part/' 16 | ) TBLPROPERTIES( 17 | 'primaryKey' = 'p_partkey' ${tblproperties_suffix} 18 | ); 19 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/1_create_partsupp.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.partsupp( 4 | ps_partkey BIGINT, 5 | ps_suppkey BIGINT, 6 | ps_availqty INT, 7 | ps_supplycost DECIMAL, 8 | ps_comment VARCHAR(199) 9 | ) 10 | USING ${table_format} OPTIONS( 11 | PATH '${data_path}${experiment_start_time}/${repetition}/partsupp/' 12 | ) TBLPROPERTIES( 13 | 'primaryKey' = 'ps_partkey,ps_suppkey' ${tblproperties_suffix} 14 | ); 15 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/1_create_region.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.region( 4 | r_regionkey BIGINT, 5 | r_name CHAR(25), 6 | r_comment VARCHAR(152) 7 | ) 8 | USING ${table_format} OPTIONS( 9 | PATH '${data_path}${experiment_start_time}/${repetition}/region/' 10 | ) TBLPROPERTIES( 11 | 'primaryKey' = 'r_regionkey' ${tblproperties_suffix} 12 | ); 13 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/1_create_supplier.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | ${catalog}.${database}.supplier( 4 | s_suppkey BIGINT, 5 | s_name CHAR(25), 6 | s_address VARCHAR(40), 7 | s_nationkey BIGINT, 8 | s_phone CHAR(15), 9 | s_acctbal DECIMAL, 10 | s_comment VARCHAR(101) 11 | ) 12 | USING ${table_format} OPTIONS( 13 | PATH '${data_path}${experiment_start_time}/${repetition}/supplier/' 14 | ) TBLPROPERTIES( 15 | 'primaryKey' = 's_suppkey' ${tblproperties_suffix} 16 | ); 17 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/2_load_customer.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.customer 2 | SELECT * 3 | FROM ${external_catalog}.${external_database}.customer; 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/2_load_lineitem.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.lineitem 2 | SELECT * 3 | FROM ${external_catalog}.${external_database}.lineitem; 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/2_load_nation.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.nation 2 | SELECT * 3 | FROM ${external_catalog}.${external_database}.nation; 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/2_load_orders.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.orders 2 | SELECT * 3 | FROM ${external_catalog}.${external_database}.orders; 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/2_load_part.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.part 2 | SELECT * 3 | FROM ${external_catalog}.${external_database}.part; 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/2_load_partsupp.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.partsupp 2 | SELECT * 3 | FROM ${external_catalog}.${external_database}.partsupp; 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/2_load_region.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.region 2 | SELECT * 3 | FROM ${external_catalog}.${external_database}.region; 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/2_load_supplier.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.supplier 2 | SELECT * 3 | FROM ${external_catalog}.${external_database}.supplier; 4 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/3_analyze_customer.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.customer COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/3_analyze_lineitem.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.lineitem COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/3_analyze_nation.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.nation COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/3_analyze_orders.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.orders COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/3_analyze_part.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.part COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/3_analyze_partsupp.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.partsupp COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/3_analyze_region.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.region COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/build/3_analyze_supplier.sql: -------------------------------------------------------------------------------- 1 | ANALYZE TABLE 2 | ${catalog}.${database}.supplier COMPUTE STATISTICS FOR ALL columns; 3 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/data_maintenance/RF2-merge.sql: -------------------------------------------------------------------------------- 1 | MERGE INTO 2 | ${catalog}.${database}.orders 3 | USING( 4 | SELECT 5 | dele_key 6 | FROM 7 | ${external_catalog}.${external_database}.s_delete_${stream_num} 8 | ) SOURCE ON 9 | o_orderkey = dele_key 10 | WHEN MATCHED THEN DELETE; 11 | 12 | MERGE INTO 13 | ${catalog}.${database}.lineitem 14 | USING( 15 | SELECT 16 | dele_key 17 | FROM 18 | ${external_catalog}.${external_database}.s_delete_${stream_num} 19 | ) SOURCE ON 20 | l_orderkey = dele_key 21 | WHEN MATCHED THEN DELETE; 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/data_maintenance/RF2.sql: -------------------------------------------------------------------------------- 1 | DELETE 2 | FROM 3 | ${catalog}.${database}.orders 4 | WHERE 5 | o_orderkey IN( 6 | SELECT 7 | dele_key 8 | FROM 9 | ${external_catalog}.${external_database}.s_delete_${stream_num} 10 | ); 11 | 12 | DELETE 13 | FROM 14 | ${catalog}.${database}.lineitem 15 | WHERE 16 | l_orderkey IN( 17 | SELECT 18 | dele_key 19 | FROM 20 | ${external_catalog}.${external_database}.s_delete_${stream_num} 21 | ); 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/init/init.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | SCHEMA IF NOT EXISTS ${catalog}.${database}; 3 | 4 | DROP 5 | TABLE 6 | IF EXISTS ${catalog}.${database}.customer; 7 | 8 | DROP 9 | TABLE 10 | IF EXISTS ${catalog}.${database}.lineitem; 11 | 12 | DROP 13 | TABLE 14 | IF EXISTS ${catalog}.${database}.nation; 15 | 16 | DROP 17 | TABLE 18 | IF EXISTS ${catalog}.${database}.orders; 19 | 20 | DROP 21 | TABLE 22 | IF EXISTS ${catalog}.${database}.part; 23 | 24 | DROP 25 | TABLE 26 | IF EXISTS ${catalog}.${database}.partsupp; 27 | 28 | DROP 29 | TABLE 30 | IF EXISTS ${catalog}.${database}.region; 31 | 32 | DROP 33 | TABLE 34 | IF EXISTS ${catalog}.${database}.supplier; 35 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query1.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | l_returnflag, 3 | l_linestatus, 4 | sum(l_quantity) as sum_qty, 5 | sum(l_extendedprice) as sum_base_price, 6 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, 7 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, 8 | avg(l_quantity) as avg_qty, 9 | avg(l_extendedprice) as avg_price, 10 | avg(l_discount) as avg_disc, 11 | count(*) as count_order 12 | FROM 13 | ${catalog}.${database}.lineitem 14 | WHERE 15 | l_shipdate <= date '1998-12-01' - interval '90' day 16 | GROUP BY 17 | l_returnflag, 18 | l_linestatus 19 | ORDER BY 20 | l_returnflag, 21 | l_linestatus; 22 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query10.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_custkey, 3 | c_name, 4 | sum(l_extendedprice * (1 - l_discount)) as revenue, 5 | c_acctbal, 6 | n_name, 7 | c_address, 8 | c_phone, 9 | c_comment 10 | FROM 11 | ${catalog}.${database}.customer, 12 | ${catalog}.${database}.orders, 13 | ${catalog}.${database}.lineitem, 14 | ${catalog}.${database}.nation 15 | WHERE 16 | c_custkey = o_custkey 17 | and l_orderkey = o_orderkey 18 | and o_orderdate >= date '1993-10-01' 19 | and o_orderdate < date '1993-10-01' + interval '3' month 20 | and l_returnflag = 'R' 21 | and c_nationkey = n_nationkey 22 | GROUP BY 23 | c_custkey, 24 | c_name, 25 | c_acctbal, 26 | c_phone, 27 | n_name, 28 | c_address, 29 | c_comment 30 | ORDER BY 31 | revenue DESC 32 | LIMIT 20; 33 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query12.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | l_shipmode, 3 | sum(case 4 | when o_orderpriority ='1-URGENT' 5 | or o_orderpriority ='2-HIGH' 6 | then 1 7 | else 0 8 | end) as high_line_count, 9 | sum(case 10 | when o_orderpriority <> '1-URGENT' 11 | and o_orderpriority <> '2-HIGH' 12 | then 1 13 | else 0 14 | end) as low_line_count 15 | FROM 16 | ${catalog}.${database}.orders, 17 | ${catalog}.${database}.lineitem 18 | WHERE 19 | o_orderkey = l_orderkey 20 | and l_shipmode in ('MAIL', 'SHIP') 21 | and l_commitdate < l_receiptdate 22 | and l_shipdate < l_commitdate 23 | and l_receiptdate >= date '1994-01-01' 24 | and l_receiptdate < date '1994-01-01' + interval '1' year 25 | GROUP BY 26 | l_shipmode 27 | ORDER BY 28 | l_shipmode; 29 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query13.sql: -------------------------------------------------------------------------------- 1 | select 2 | c_count, 3 | count(*) as custdist 4 | from ( 5 | select 6 | c_custkey, 7 | count(o_orderkey) 8 | from 9 | ${catalog}.${database}.customer left outer join ${catalog}.${database}.orders 10 | on c_custkey = o_custkey 11 | and o_comment not like '%special%requests%' 12 | group by 13 | c_custkey 14 | ) as c_orders (c_custkey, c_count) 15 | group by 16 | c_count 17 | order by 18 | custdist desc, 19 | c_count desc; 20 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query14.sql: -------------------------------------------------------------------------------- 1 | select 2 | 100.00 * sum(case 3 | when p_type like 'PROMO%' 4 | then l_extendedprice*(1-l_discount) 5 | else 0 6 | end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue 7 | from 8 | ${catalog}.${database}.lineitem, 9 | ${catalog}.${database}.part 10 | where 11 | l_partkey = p_partkey 12 | and l_shipdate >= date '1995-09-01' 13 | and l_shipdate < date '1995-09-01' + interval '1' month; 14 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query15.sql: -------------------------------------------------------------------------------- 1 | create or replace temporary view revenue 2 | (supplier_no, total_revenue) as 3 | select 4 | l_suppkey, 5 | sum(l_extendedprice * (1 - l_discount)) 6 | from 7 | ${catalog}.${database}.lineitem 8 | where 9 | l_shipdate >= date '1996-01-01' 10 | and l_shipdate < date '1996-01-01' + interval '3' month 11 | group by 12 | l_suppkey; 13 | 14 | select 15 | s_suppkey, 16 | s_name, 17 | s_address, 18 | s_phone, 19 | total_revenue 20 | from 21 | ${catalog}.${database}.supplier, 22 | revenue 23 | where 24 | s_suppkey = supplier_no 25 | and total_revenue = ( 26 | select 27 | max(total_revenue) 28 | from 29 | revenue 30 | ) 31 | order by 32 | s_suppkey; 33 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query16.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | p_brand, 3 | p_type, 4 | p_size, 5 | count(distinct ps_suppkey) as supplier_cnt 6 | FROM 7 | ${catalog}.${database}.partsupp, 8 | ${catalog}.${database}.part 9 | WHERE 10 | p_partkey = ps_partkey 11 | and p_brand <> 'Brand#45' 12 | and p_type not like 'MEDIUM POLISHED%' 13 | and p_size in (49, 14, 23, 45, 19, 3, 36, 9) 14 | and ps_suppkey not in ( 15 | SELECT 16 | s_suppkey 17 | FROM 18 | ${catalog}.${database}.supplier 19 | WHERE 20 | s_comment like '%Customer%Complaints%' 21 | ) 22 | GROUP BY 23 | p_brand, 24 | p_type, 25 | p_size 26 | ORDER BY 27 | supplier_cnt desc, 28 | p_brand, 29 | p_type, 30 | p_size; 31 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query17.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l_extendedprice) / 7.0 as avg_yearly 3 | from 4 | ${catalog}.${database}.lineitem, 5 | ${catalog}.${database}.part 6 | where 7 | p_partkey = l_partkey 8 | and p_brand = 'Brand#23' 9 | and p_container = 'MED BOX' 10 | and l_quantity < ( 11 | select 12 | 0.2 * avg(l_quantity) 13 | from 14 | ${catalog}.${database}.lineitem 15 | where 16 | l_partkey = p_partkey 17 | ); 18 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query18.sql: -------------------------------------------------------------------------------- 1 | select 2 | c_name, 3 | c_custkey, 4 | o_orderkey, 5 | o_orderdate, 6 | o_totalprice, 7 | sum(l_quantity) 8 | from 9 | ${catalog}.${database}.customer, 10 | ${catalog}.${database}.orders, 11 | ${catalog}.${database}.lineitem 12 | where 13 | o_orderkey in ( 14 | select 15 | l_orderkey 16 | from 17 | ${catalog}.${database}.lineitem 18 | group by 19 | l_orderkey having 20 | sum(l_quantity) > 300 21 | ) 22 | and c_custkey = o_custkey 23 | and o_orderkey = l_orderkey 24 | group by 25 | c_name, 26 | c_custkey, 27 | o_orderkey, 28 | o_orderdate, 29 | o_totalprice 30 | order by 31 | o_totalprice desc, 32 | o_orderdate 33 | limit 100; 34 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | l_orderkey, sum(l_extendedprice*(1-l_discount)) as revenue, o_orderdate, o_shippriority 3 | FROM 4 | ${catalog}.${database}.customer, 5 | ${catalog}.${database}.orders, 6 | ${catalog}.${database}.lineitem 7 | WHERE 8 | c_mktsegment = 'BUILDING' 9 | and c_custkey = o_custkey 10 | and l_orderkey = o_orderkey 11 | and o_orderdate < date '1995-03-15' 12 | and l_shipdate > date '1995-03-15' 13 | GROUP BY 14 | l_orderkey, 15 | o_orderdate, 16 | o_shippriority 17 | ORDER BY 18 | revenue DESC, 19 | o_orderdate 20 | LIMIT 10; 21 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query4.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | o_orderpriority, 3 | count(*) as order_count 4 | FROM 5 | ${catalog}.${database}.orders 6 | WHERE 7 | o_orderdate >= date '1993-07-01' 8 | AND o_orderdate < date '1993-07-01' + interval '3' month 9 | AND EXISTS ( 10 | SELECT * 11 | FROM 12 | ${catalog}.${database}.lineitem 13 | WHERE 14 | l_orderkey = o_orderkey 15 | AND l_commitdate < l_receiptdate 16 | ) 17 | GROUP BY 18 | o_orderpriority 19 | ORDER BY 20 | o_orderpriority; 21 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query5.sql: -------------------------------------------------------------------------------- 1 | select 2 | n_name, 3 | sum(l_extendedprice * (1 - l_discount)) as revenue 4 | from 5 | ${catalog}.${database}.customer, 6 | ${catalog}.${database}.orders, 7 | ${catalog}.${database}.lineitem, 8 | ${catalog}.${database}.supplier, 9 | ${catalog}.${database}.nation, 10 | ${catalog}.${database}.region 11 | where 12 | c_custkey = o_custkey 13 | and l_orderkey = o_orderkey 14 | and l_suppkey = s_suppkey 15 | and c_nationkey = s_nationkey 16 | and s_nationkey = n_nationkey 17 | and n_regionkey = r_regionkey 18 | and r_name = 'ASIA' 19 | and o_orderdate >= date '1994-01-01' 20 | and o_orderdate < date '1994-01-01' + interval '1' year 21 | group by 22 | n_name 23 | order by 24 | revenue desc; 25 | -------------------------------------------------------------------------------- /core/run/spark-3.3.1/scripts/tpch/single_user/query6.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l_extendedprice * l_discount) as revenue 3 | from ${catalog}.${database}.lineitem 4 | where 5 | l_shipdate >= date '1994-01-01' 6 | and l_shipdate < date '1994-01-01' + interval '1' year 7 | and l_discount between 0.06 - 0.01 and 0.06 + 0.01 8 | and l_quantity < 24; 9 | -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/config/connections_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Connections Configuration 2 | --- 3 | version: 1 4 | connections: 5 | - id: trino_0 6 | driver: io.trino.jdbc.TrinoDriver 7 | url: jdbc:trino://${TRINO_MASTER_HOST}:8080 8 | username: admin 9 | password: '' 10 | -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/config/telemetry_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Telemetry Configuration 2 | --- 3 | version: 1 4 | connection: 5 | id: duckdb_0 6 | driver: org.duckdb.DuckDBDriver 7 | url: jdbc:duckdb:./telemetry-trino-420 8 | execute_ddl: true 9 | ddl_file: 'src/main/resources/scripts/logging/duckdb/ddl.sql' 10 | insert_file: 'src/main/resources/scripts/logging/duckdb/insert.sql' 11 | # The following parameter values will be used to replace the variables in the logging statements. 12 | parameter_values: 13 | data_path: '' -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/coordinator-config.properties.template: -------------------------------------------------------------------------------- 1 | coordinator=true 2 | node-scheduler.include-coordinator=false 3 | http-server.http.port=8080 4 | discovery.uri=http://$TRINO_MASTER_HOST:8080 5 | query.max-memory=378GB -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/delta.properties.template: -------------------------------------------------------------------------------- 1 | connector.name=delta_lake 2 | hive.metastore.uri=thrift://${TRINO_MASTER_HOST}:9083 3 | hive.azure.abfs-storage-account=${DATA_STORAGE_ACCOUNT} 4 | hive.azure.abfs-access-key=${DATA_STORAGE_ACCOUNT_SHARED_KEY} 5 | delta.max-partitions-per-writer=2500 6 | delta.compression-codec=GZIP -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/dist-exec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | source env.sh 3 | if [ -z "${HOSTS}" ]; then 4 | echo "ERROR: HOSTS is not defined." 5 | exit 1 6 | fi 7 | 8 | if [ "$#" -lt 2 ]; then 9 | echo "Error: Please provide at least two input parameters." 10 | exit 1 11 | fi 12 | deploy_dir=$1 13 | script_file=$2 14 | 15 | for node in $HOSTS ; do ssh -t $node "mkdir -p ~/$deploy_dir" ; done 16 | for node in $HOSTS ; do scp *.template $node:~/$deploy_dir ; done 17 | for node in $HOSTS ; do scp $script_file $node:~/$deploy_dir ; done 18 | for node in $HOSTS ; do ssh -t $node "cd ~/$deploy_dir && chmod +x ./$script_file && ./$script_file ${@:3}" ; done 19 | -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/dist-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | if [ -z "${HOME}" ]; then 3 | echo "ERROR: HOME is not defined." 4 | exit 1 5 | fi 6 | 7 | # Install packages 8 | sudo apt install -y net-tools nmap 9 | 10 | # Configure hosts 11 | my_ip=$(/sbin/ifconfig eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p') 12 | ip_range=${my_ip%.*}.* 13 | nmap -sn $ip_range | grep -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}' | grep -v "^$my_ip$" > $HOME/hostiplist 14 | 15 | export HOSTS=$(<$HOME/hostiplist) 16 | 17 | for node in $HOSTS ; do scp ~/.ssh/id_rsa* $node:~/.ssh/ ; done 18 | 19 | # Push to environment 20 | echo "export HOSTS=\"${HOSTS}\"" >> env.sh 21 | echo "source $(pwd)/env.sh" >> ~/.bashrc 22 | -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/hive.properties.template: -------------------------------------------------------------------------------- 1 | connector.name=hive 2 | hive.metastore.uri=thrift://${TRINO_MASTER_HOST}:9083 3 | hive.allow-drop-table=true 4 | hive.azure.abfs-storage-account=${DATA_STORAGE_ACCOUNT} 5 | hive.azure.abfs-access-key=${DATA_STORAGE_ACCOUNT_SHARED_KEY} -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/iceberg.properties.template: -------------------------------------------------------------------------------- 1 | connector.name=iceberg 2 | hive.metastore.uri=thrift://${TRINO_MASTER_HOST}:9083 3 | hive.azure.abfs-storage-account=${DATA_STORAGE_ACCOUNT} 4 | hive.azure.abfs-access-key=${DATA_STORAGE_ACCOUNT_SHARED_KEY} 5 | iceberg.max-partitions-per-writer=2500 6 | iceberg.file-format=PARQUET 7 | iceberg.compression-codec=GZIP -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/jvm.config.template: -------------------------------------------------------------------------------- 1 | -server 2 | -Xmx54G 3 | -XX:InitialRAMPercentage=80 4 | -XX:MaxRAMPercentage=80 5 | -XX:G1HeapRegionSize=32M 6 | -XX:+ExplicitGCInvokesConcurrent 7 | -XX:+ExitOnOutOfMemoryError 8 | -XX:+HeapDumpOnOutOfMemoryError 9 | -XX:-OmitStackTraceInFastThrow 10 | -XX:ReservedCodeCacheSize=512M 11 | -XX:PerMethodRecompilationCutoff=10000 12 | -XX:PerBytecodeRecompilationCutoff=10000 13 | -Djdk.attach.allowAttachSelf=true 14 | -Djdk.nio.maxCachedBufferSize=2000000 15 | -XX:+UnlockDiagnosticVMOptions 16 | -XX:+UseAESCTRIntrinsics 17 | # Disable Preventive GC for performance reasons (JDK-8293861) 18 | -XX:-G1UsePreventiveGC -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/log.properties.template: -------------------------------------------------------------------------------- 1 | io.trino=INFO -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/node.properties.template: -------------------------------------------------------------------------------- 1 | node.environment=production 2 | node.id=$HOSTNAME 3 | node.data-dir=/mnt/local_resource/trino_data -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/start-cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | source env.sh 3 | if [ -z "${HIVE_HOME}" ]; then 4 | echo "ERROR: HIVE_HOME is not defined." 5 | exit 1 6 | fi 7 | if [ -z "${TRINO_HOME}" ]; then 8 | echo "ERROR: TRINO_HOME is not defined." 9 | exit 1 10 | fi 11 | if [ -z "${HOSTS}" ]; then 12 | echo "ERROR: HOSTS is not defined." 13 | exit 1 14 | fi 15 | 16 | echo "Starting HMS" 17 | cd $HIVE_HOME 18 | ./bin/hive --service metastore & 19 | 20 | echo "Starting Trino cluster" 21 | echo "Starting Trino coordinator" 22 | cd $TRINO_HOME 23 | ./bin/launcher start 24 | echo "Starting Trino workers" 25 | for node in $HOSTS ; do ssh -t $node "cd ${TRINO_HOME} && ./bin/launcher start" ; done 26 | -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/stop-cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | source env.sh 3 | if [ -z "${HOSTS}" ]; then 4 | echo "ERROR: HOSTS is not defined." 5 | exit 1 6 | fi 7 | if [ -z "${TRINO_HOME}" ]; then 8 | echo "ERROR: TRINO_HOME is not defined." 9 | exit 1 10 | fi 11 | 12 | echo "Stopping Trino cluster" 13 | echo "Stopping Trino workers" 14 | for node in $HOSTS ; do ssh -t $node "cd ${TRINO_HOME} && ./bin/launcher stop" ; done 15 | echo "Stopping Trino coordinator" 16 | cd $TRINO_HOME 17 | ./bin/launcher stop 18 | 19 | echo "Stopping HMS" 20 | pkill -f "metastore" || true 21 | -------------------------------------------------------------------------------- /core/run/trino-420/azure-pipelines/sh/worker-config.properties.template: -------------------------------------------------------------------------------- 1 | coordinator=false 2 | http-server.http.port=8080 3 | discovery.uri=http://$TRINO_MASTER_HOST:8080 -------------------------------------------------------------------------------- /core/run/trino-420/results/trino-420-2024-02-01-8xStandard_E8s_v5.duckdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lst-bench/8e8c8592d4763c2dd58d7e28e78e6dd1dada0a8e/core/run/trino-420/results/trino-420-2024-02-01-8xStandard_E8s_v5.duckdb -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/1_create_catalog_page.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE ${catalog}.${database}.catalog_page( 2 | cp_catalog_page_sk int , 3 | cp_catalog_page_id varchar(16) , 4 | cp_start_date_sk int , 5 | cp_end_date_sk int , 6 | cp_department varchar(50) , 7 | cp_catalog_number int , 8 | cp_catalog_page_number int , 9 | cp_description varchar(100) , 10 | cp_type varchar(100) 11 | ) WITH (location='${data_path}${experiment_start_time}/${repetition}/catalog_page/' ${tblproperties_suffix}); 12 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/1_create_customer_demographics.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE ${catalog}.${database}.customer_demographics( 2 | cd_demo_sk int , 3 | cd_gender varchar(1) , 4 | cd_marital_status varchar(1) , 5 | cd_education_status varchar(20) , 6 | cd_purchase_estimate int , 7 | cd_credit_rating varchar(10) , 8 | cd_dep_count int , 9 | cd_dep_employed_count int , 10 | cd_dep_college_count int 11 | ) WITH (location='${data_path}${experiment_start_time}/${repetition}/customer_demographics/' ${tblproperties_suffix}); 12 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/1_create_household_demographics.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE ${catalog}.${database}.household_demographics( 2 | hd_demo_sk int , 3 | hd_income_band_sk int , 4 | hd_buy_potential varchar(15) , 5 | hd_dep_count int , 6 | hd_vehicle_count int 7 | ) WITH (location='${data_path}${experiment_start_time}/${repetition}/household_demographics/' ${tblproperties_suffix}); 8 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/1_create_income_band.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE ${catalog}.${database}.income_band( 2 | ib_income_band_sk int , 3 | ib_lower_bound int , 4 | ib_upper_bound int 5 | ) WITH (location='${data_path}${experiment_start_time}/${repetition}/income_band/' ${tblproperties_suffix}); 6 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/1_create_inventory.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE ${catalog}.${database}.inventory( 2 | inv_item_sk int , 3 | inv_warehouse_sk int , 4 | inv_quantity_on_hand int , 5 | inv_date_sk int 6 | ) WITH (location='${data_path}${experiment_start_time}/${repetition}/inventory/', ${partition_spec_keyword}=ARRAY['inv_date_sk'] ${tblproperties_suffix}); 7 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/1_create_reason.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE ${catalog}.${database}.reason( 2 | r_reason_sk int , 3 | r_reason_id varchar(16) , 4 | r_reason_desc varchar(100) 5 | ) WITH (location='${data_path}${experiment_start_time}/${repetition}/reason/' ${tblproperties_suffix}); 6 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/1_create_ship_mode.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE ${catalog}.${database}.ship_mode( 2 | sm_ship_mode_sk int , 3 | sm_ship_mode_id varchar(16) , 4 | sm_type varchar(30) , 5 | sm_code varchar(10) , 6 | sm_carrier varchar(20) , 7 | sm_contract varchar(20) 8 | ) WITH (location='${data_path}${experiment_start_time}/${repetition}/ship_mode/' ${tblproperties_suffix}); 9 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/1_create_time_dim.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE ${catalog}.${database}.time_dim( 2 | t_time_sk int , 3 | t_time_id varchar(16) , 4 | t_time int , 5 | t_hour int , 6 | t_minute int , 7 | t_second int , 8 | t_am_pm varchar(2) , 9 | t_shift varchar(20) , 10 | t_sub_shift varchar(20) , 11 | t_meal_time varchar(20) 12 | ) WITH (location='${data_path}${experiment_start_time}/${repetition}/time_dim/' ${tblproperties_suffix}); 13 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_call_center.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.call_center SELECT * FROM ${external_catalog}.${external_database}.call_center; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_catalog_page.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.catalog_page SELECT * FROM ${external_catalog}.${external_database}.catalog_page; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_catalog_returns.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.catalog_returns SELECT * FROM ${external_catalog}.${external_database}.catalog_returns; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_catalog_sales.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.catalog_sales SELECT * FROM ${external_catalog}.${external_database}.catalog_sales; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_customer.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.customer SELECT * FROM ${external_catalog}.${external_database}.customer; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_customer_address.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.customer_address SELECT * FROM ${external_catalog}.${external_database}.customer_address; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_customer_demographics.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.customer_demographics SELECT * FROM ${external_catalog}.${external_database}.customer_demographics; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_date_dim.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.date_dim SELECT * FROM ${external_catalog}.${external_database}.date_dim; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_household_demographics.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.household_demographics SELECT * FROM ${external_catalog}.${external_database}.household_demographics; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_income_band.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.income_band SELECT * FROM ${external_catalog}.${external_database}.income_band; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_inventory.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.inventory SELECT * FROM ${external_catalog}.${external_database}.inventory; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_item.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.item SELECT * FROM ${external_catalog}.${external_database}.item; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_promotion.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.promotion SELECT * FROM ${external_catalog}.${external_database}.promotion; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_reason.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.reason SELECT * FROM ${external_catalog}.${external_database}.reason; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_ship_mode.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.ship_mode SELECT * FROM ${external_catalog}.${external_database}.ship_mode; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_store.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.store SELECT * FROM ${external_catalog}.${external_database}.store; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_store_returns.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.store_returns SELECT * FROM ${external_catalog}.${external_database}.store_returns; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_store_sales.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.store_sales SELECT * FROM ${external_catalog}.${external_database}.store_sales; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_time_dim.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.time_dim SELECT * FROM ${external_catalog}.${external_database}.time_dim; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_warehouse.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.warehouse SELECT * FROM ${external_catalog}.${external_database}.warehouse; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_web_page.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.web_page SELECT * FROM ${external_catalog}.${external_database}.web_page; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_web_returns.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.web_returns SELECT * FROM ${external_catalog}.${external_database}.web_returns; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_web_sales.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.web_sales SELECT * FROM ${external_catalog}.${external_database}.web_sales; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/2_load_web_site.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO ${catalog}.${database}.web_site SELECT * FROM ${external_catalog}.${external_database}.web_site; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_call_center.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.call_center; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_catalog_page.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.catalog_page; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_catalog_returns.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.catalog_returns; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_catalog_sales.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.catalog_sales; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_customer.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.customer; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_customer_address.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.customer_address; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_customer_demographics.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.customer_demographics; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_date_dim.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.date_dim; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_household_demographics.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.household_demographics; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_income_band.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.income_band; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_inventory.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.inventory; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_item.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.item; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_promotion.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.promotion; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_reason.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.reason; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_ship_mode.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.ship_mode; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_store.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.store; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_store_returns.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.store_returns; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_store_sales.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.store_sales; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_time_dim.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.time_dim; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_warehouse.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.warehouse; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_web_page.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.web_page; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_web_returns.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.web_returns; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_web_sales.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.web_sales; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/build/3_analyze_web_site.sql: -------------------------------------------------------------------------------- 1 | ANALYZE ${catalog}.${database}.web_site; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_call_center.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.call_center EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_catalog_page.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.catalog_page EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_catalog_returns.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.catalog_returns EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_catalog_sales.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.catalog_sales EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_customer.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.customer EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_customer_address.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.customer_address EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_customer_demographics.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.customer_demographics EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_date_dim.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.date_dim EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_household_demographics.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.household_demographics EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_income_band.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.income_band EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_inventory.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.inventory EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_item.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.item EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_promotion.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.promotion EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_reason.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.reason EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_ship_mode.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.ship_mode EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_store.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.store EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_store_returns.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.store_returns EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_store_sales.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.store_sales EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_time_dim.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.time_dim EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_warehouse.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.warehouse EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_web_page.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.web_page EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_web_returns.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.web_returns EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_web_sales.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.web_sales EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize/o_web_site.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.web_site EXECUTE optimize; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_IN.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.catalog_returns EXECUTE optimize WHERE cr_returned_date_sk IN (${cr_returned_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_NULL.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.catalog_returns EXECUTE optimize WHERE cr_returned_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT cr_returned_date_sk AS cr_returned_date_sk 2 | FROM ${catalog}.${database}.catalog_returns 3 | WHERE cr_returned_date_sk IS NOT NULL 4 | ORDER BY cr_returned_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_IN.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.catalog_sales EXECUTE optimize WHERE cs_sold_date_sk IN (${cs_sold_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_NULL.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.catalog_sales EXECUTE optimize WHERE cs_sold_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT cs_sold_date_sk AS cs_sold_date_sk 2 | FROM ${catalog}.${database}.catalog_sales 3 | WHERE cs_sold_date_sk IS NOT NULL 4 | ORDER BY cs_sold_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_inventory_IN.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.inventory EXECUTE optimize WHERE inv_date_sk IN (${inv_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_inventory_NULL.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.inventory EXECUTE optimize WHERE inv_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_inventory_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT inv_date_sk AS inv_date_sk 2 | FROM ${catalog}.${database}.inventory 3 | WHERE inv_date_sk IS NOT NULL 4 | ORDER BY inv_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_store_returns_IN.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.store_returns EXECUTE optimize WHERE sr_returned_date_sk IN (${sr_returned_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_store_returns_NULL.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.store_returns EXECUTE optimize WHERE sr_returned_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_store_returns_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT sr_returned_date_sk AS sr_returned_date_sk 2 | FROM ${catalog}.${database}.store_returns 3 | WHERE sr_returned_date_sk IS NOT NULL 4 | ORDER BY sr_returned_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_store_sales_IN.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.store_sales EXECUTE optimize WHERE ss_sold_date_sk IN (${ss_sold_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_store_sales_NULL.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.store_sales EXECUTE optimize WHERE ss_sold_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_store_sales_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT ss_sold_date_sk AS ss_sold_date_sk 2 | FROM ${catalog}.${database}.store_sales 3 | WHERE ss_sold_date_sk IS NOT NULL 4 | ORDER BY ss_sold_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_web_returns_IN.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.web_returns EXECUTE optimize WHERE wr_returned_date_sk IN (${wr_returned_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_web_returns_NULL.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.web_returns EXECUTE optimize WHERE wr_returned_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_web_returns_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT wr_returned_date_sk AS wr_returned_date_sk 2 | FROM ${catalog}.${database}.web_returns 3 | WHERE wr_returned_date_sk IS NOT NULL 4 | ORDER BY wr_returned_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_web_sales_IN.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.web_sales EXECUTE optimize WHERE ws_sold_date_sk IN (${ws_sold_date_sk}); 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_web_sales_NULL.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE ${catalog}.${database}.web_sales EXECUTE optimize WHERE ws_sold_date_sk IS NULL; 2 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/optimize_split/o_web_sales_SELECT.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT ws_sold_date_sk AS ws_sold_date_sk 2 | FROM ${catalog}.${database}.web_sales 3 | WHERE ws_sold_date_sk IS NOT NULL 4 | ORDER BY ws_sold_date_sk ASC; 5 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query15.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "ca_zip" 3 | , "sum"("cs_sales_price") 4 | FROM 5 | ${catalog}.${database}.catalog_sales 6 | , ${catalog}.${database}.customer 7 | , ${catalog}.${database}.customer_address 8 | , ${catalog}.${database}.date_dim 9 | WHERE ("cs_bill_customer_sk" = "c_customer_sk") 10 | AND ("c_current_addr_sk" = "ca_address_sk") 11 | AND (("substr"("ca_zip", 1, 5) IN ('85669', '86197','88274','83405','86475', 12 | '85392', '85460', '80348', '81792')) 13 | OR ("ca_state" IN ('CA','WA','GA')) 14 | OR ("cs_sales_price" > 500)) 15 | AND ("cs_sold_date_sk" = "d_date_sk") 16 | AND ("d_qoy" = 2) 17 | AND ("d_year" = 2002) 18 | GROUP BY "ca_zip" 19 | ORDER BY "ca_zip" ASC 20 | LIMIT 100; 21 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query22.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "i_product_name" 3 | , "i_brand" 4 | , "i_class" 5 | , "i_category" 6 | , "avg"("inv_quantity_on_hand") "qoh" 7 | FROM 8 | ${catalog}.${database}.inventory 9 | , ${catalog}.${database}.date_dim 10 | , ${catalog}.${database}.item 11 | WHERE ("inv_date_sk" = "d_date_sk") 12 | AND ("inv_item_sk" = "i_item_sk") 13 | AND ("d_month_seq" BETWEEN 1201 AND (1201 + 11)) 14 | GROUP BY ROLLUP (i_product_name, i_brand, i_class, i_category) 15 | ORDER BY "qoh" ASC, "i_product_name" ASC, "i_brand" ASC, "i_class" ASC, "i_category" ASC 16 | LIMIT 100; 17 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "dt"."d_year" 3 | , "item"."i_brand_id" "brand_id" 4 | , "item"."i_brand" "brand" 5 | , "sum"("ss_ext_sales_price") "sum_agg" 6 | FROM 7 | ${catalog}.${database}.date_dim dt 8 | , ${catalog}.${database}.store_sales 9 | , ${catalog}.${database}.item 10 | WHERE ("dt"."d_date_sk" = "store_sales"."ss_sold_date_sk") 11 | AND ("store_sales"."ss_item_sk" = "item"."i_item_sk") 12 | AND ("item"."i_manufact_id" = 445) 13 | AND ("dt"."d_moy" = 12) 14 | GROUP BY "dt"."d_year", "item"."i_brand", "item"."i_brand_id" 15 | ORDER BY "dt"."d_year" ASC, "sum_agg" DESC, "brand_id" ASC 16 | LIMIT 100; 17 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query37.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "i_item_id" 3 | , "i_item_desc" 4 | , "i_current_price" 5 | FROM 6 | ${catalog}.${database}.item 7 | , ${catalog}.${database}.inventory 8 | , ${catalog}.${database}.date_dim 9 | , ${catalog}.${database}.catalog_sales 10 | WHERE ("i_current_price" BETWEEN 26 AND (26 + 30)) 11 | AND ("inv_item_sk" = "i_item_sk") 12 | AND ("d_date_sk" = "inv_date_sk") 13 | AND (CAST("d_date" AS DATE) BETWEEN CAST('2001-06-09' AS DATE) AND (CAST('2001-06-09' AS DATE) + INTERVAL '60' DAY)) 14 | AND ("i_manufact_id" IN (744,884,722,693)) 15 | AND ("inv_quantity_on_hand" BETWEEN 100 AND 500) 16 | AND ("cs_item_sk" = "i_item_sk") 17 | GROUP BY "i_item_id", "i_item_desc", "i_current_price" 18 | ORDER BY "i_item_id" ASC 19 | LIMIT 100; 20 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query42.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "dt"."d_year" 3 | , "item"."i_category_id" 4 | , "item"."i_category" 5 | , "sum"("ss_ext_sales_price") 6 | FROM 7 | ${catalog}.${database}.date_dim dt 8 | , ${catalog}.${database}.store_sales 9 | , ${catalog}.${database}.item 10 | WHERE ("dt"."d_date_sk" = "store_sales"."ss_sold_date_sk") 11 | AND ("store_sales"."ss_item_sk" = "item"."i_item_sk") 12 | AND ("item"."i_manager_id" = 1) 13 | AND ("dt"."d_moy" = 11) 14 | AND ("dt"."d_year" = 1998) 15 | GROUP BY "dt"."d_year", "item"."i_category_id", "item"."i_category" 16 | ORDER BY "sum"("ss_ext_sales_price") DESC, "dt"."d_year" ASC, "item"."i_category_id" ASC, "item"."i_category" ASC 17 | LIMIT 100; 18 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query52.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "dt"."d_year" 3 | , "item"."i_brand_id" "brand_id" 4 | , "item"."i_brand" "brand" 5 | , "sum"("ss_ext_sales_price") "ext_price" 6 | FROM 7 | ${catalog}.${database}.date_dim dt 8 | , ${catalog}.${database}.store_sales 9 | , ${catalog}.${database}.item 10 | WHERE ("dt"."d_date_sk" = "store_sales"."ss_sold_date_sk") 11 | AND ("store_sales"."ss_item_sk" = "item"."i_item_sk") 12 | AND ("item"."i_manager_id" = 1) 13 | AND ("dt"."d_moy" = 11) 14 | AND ("dt"."d_year" = 2000) 15 | GROUP BY "dt"."d_year", "item"."i_brand", "item"."i_brand_id" 16 | ORDER BY "dt"."d_year" ASC, "ext_price" DESC, "brand_id" ASC 17 | LIMIT 100; 18 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query55.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "i_brand_id" "brand_id" 3 | , "i_brand" "brand" 4 | , "sum"("ss_ext_sales_price") "ext_price" 5 | FROM 6 | ${catalog}.${database}.date_dim 7 | , ${catalog}.${database}.store_sales 8 | , ${catalog}.${database}.item 9 | WHERE ("d_date_sk" = "ss_sold_date_sk") 10 | AND ("ss_item_sk" = "i_item_sk") 11 | AND ("i_manager_id" = 20) 12 | AND ("d_moy" = 12) 13 | AND ("d_year" = 1998) 14 | GROUP BY "i_brand", "i_brand_id" 15 | ORDER BY "ext_price" DESC, "i_brand_id" ASC 16 | LIMIT 100; 17 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query82.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "i_item_id" 3 | , "i_item_desc" 4 | , "i_current_price" 5 | FROM 6 | ${catalog}.${database}.item 7 | , ${catalog}.${database}.inventory 8 | , ${catalog}.${database}.date_dim 9 | , ${catalog}.${database}.store_sales 10 | WHERE ("i_current_price" BETWEEN 69 AND (69 + 30)) 11 | AND ("inv_item_sk" = "i_item_sk") 12 | AND ("d_date_sk" = "inv_date_sk") 13 | AND (CAST("d_date" AS DATE) BETWEEN CAST('1998-06-06' AS DATE) AND (CAST('1998-06-06' AS DATE) + INTERVAL '60' DAY)) 14 | AND ("i_manufact_id" IN (105,513,180,137)) 15 | AND ("inv_quantity_on_hand" BETWEEN 100 AND 500) 16 | AND ("ss_item_sk" = "i_item_sk") 17 | GROUP BY "i_item_id", "i_item_desc", "i_current_price" 18 | ORDER BY "i_item_id" ASC 19 | LIMIT 100; 20 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query84.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "c_customer_id" "customer_id" 3 | , "concat"("concat"("c_last_name", ', '), "c_first_name") "customername" 4 | FROM 5 | ${catalog}.${database}.customer 6 | , ${catalog}.${database}.customer_address 7 | , ${catalog}.${database}.customer_demographics 8 | , ${catalog}.${database}.household_demographics 9 | , ${catalog}.${database}.income_band 10 | , ${catalog}.${database}.store_returns 11 | WHERE ("ca_city" = 'White Oak') 12 | AND ("c_current_addr_sk" = "ca_address_sk") 13 | AND ("ib_lower_bound" >= 45626) 14 | AND ("ib_upper_bound" <= (45626 + 50000)) 15 | AND ("ib_income_band_sk" = "hd_income_band_sk") 16 | AND ("cd_demo_sk" = "c_current_cdemo_sk") 17 | AND ("hd_demo_sk" = "c_current_hdemo_sk") 18 | AND ("sr_cdemo_sk" = "cd_demo_sk") 19 | ORDER BY "c_customer_id" ASC 20 | LIMIT 100; 21 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query86.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "sum"("ws_net_paid") "total_sum" 3 | , "i_category" 4 | , "i_class" 5 | , (GROUPING ("i_category") + GROUPING ("i_class")) "lochierarchy" 6 | , "rank"() OVER (PARTITION BY (GROUPING ("i_category") + GROUPING ("i_class")), (CASE WHEN (GROUPING ("i_class") = 0) THEN "i_category" END) ORDER BY "sum"("ws_net_paid") DESC) "rank_within_parent" 7 | FROM 8 | ${catalog}.${database}.web_sales 9 | , ${catalog}.${database}.date_dim d1 10 | , ${catalog}.${database}.item 11 | WHERE ("d1"."d_month_seq" BETWEEN 1205 AND (1205 + 11)) 12 | AND ("d1"."d_date_sk" = "ws_sold_date_sk") 13 | AND ("i_item_sk" = "ws_item_sk") 14 | GROUP BY ROLLUP (i_category, i_class) 15 | ORDER BY "lochierarchy" DESC, (CASE WHEN ("lochierarchy" = 0) THEN "i_category" END) ASC, "rank_within_parent" ASC 16 | LIMIT 100; 17 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query93.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "ss_customer_sk" 3 | , "sum"("act_sales") "sumsales" 4 | FROM 5 | ( 6 | SELECT 7 | "ss_item_sk" 8 | , "ss_ticket_number" 9 | , "ss_customer_sk" 10 | , (CASE WHEN ("sr_return_quantity" IS NOT NULL) THEN (("ss_quantity" - "sr_return_quantity") * "ss_sales_price") ELSE ("ss_quantity" * "ss_sales_price") END) "act_sales" 11 | FROM 12 | (${catalog}.${database}.store_sales 13 | LEFT JOIN ${catalog}.${database}.store_returns ON ("sr_item_sk" = "ss_item_sk") 14 | AND ("sr_ticket_number" = "ss_ticket_number")) 15 | , ${catalog}.${database}.reason 16 | WHERE ("sr_reason_sk" = "r_reason_sk") 17 | AND ("r_reason_desc" = 'Did not get it on time') 18 | ) t 19 | GROUP BY "ss_customer_sk" 20 | ORDER BY "sumsales" ASC, "ss_customer_sk" ASC 21 | LIMIT 100; 22 | -------------------------------------------------------------------------------- /core/run/trino-420/scripts/tpcds/single_user/query96.sql: -------------------------------------------------------------------------------- 1 | SELECT "count"(*) 2 | FROM 3 | ${catalog}.${database}.store_sales 4 | , ${catalog}.${database}.household_demographics 5 | , ${catalog}.${database}.time_dim 6 | , ${catalog}.${database}.store 7 | WHERE ("ss_sold_time_sk" = "time_dim"."t_time_sk") 8 | AND ("ss_hdemo_sk" = "household_demographics"."hd_demo_sk") 9 | AND ("ss_store_sk" = "s_store_sk") 10 | AND ("time_dim"."t_hour" = 8) 11 | AND ("time_dim"."t_minute" >= 30) 12 | AND ("household_demographics"."hd_dep_count" = 5) 13 | AND ("store"."s_store_name" = 'ese') 14 | ORDER BY "count"(*) ASC 15 | LIMIT 100; 16 | -------------------------------------------------------------------------------- /core/src/main/resources/config/spark/sample_connections_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Connections Configuration 2 | --- 3 | version: 1 4 | connections: 5 | - id: spark_0 6 | driver: org.apache.hive.jdbc.HiveDriver 7 | url: jdbc:hive2://127.0.0.1:10000 8 | max_num_retries: 3 9 | show_warnings: true 10 | username: ${DATABASE_USER:-spark_admin} 11 | password: ${DATABASE_PASSWORD} 12 | - id: spark_1 13 | type: jdbc 14 | driver: org.apache.hive.jdbc.HiveDriver 15 | url: jdbc:hive2://127.0.0.1:10001 16 | username: admin 17 | password: p@ssw0rd1 18 | - id: spark_2 19 | type: spark 20 | url: spark://127.0.0.1:7077 21 | config: 22 | spark.worker.timeout: "60" 23 | -------------------------------------------------------------------------------- /core/src/main/resources/config/spark/sample_telemetry_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Telemetry Configuration 2 | --- 3 | version: 1 4 | connection: 5 | id: duckdb_0 6 | driver: org.duckdb.DuckDBDriver 7 | url: jdbc:duckdb:./telemetry 8 | execute_ddl: true 9 | ddl_file: 'src/main/resources/scripts/logging/duckdb/ddl.sql' 10 | insert_file: 'src/main/resources/scripts/logging/duckdb/insert.sql' 11 | # The following parameter values will be used to replace the variables in the logging statements. 12 | parameter_values: 13 | data_path: '' 14 | -------------------------------------------------------------------------------- /core/src/main/resources/config/trino/sample_connections_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Connections Configuration 2 | --- 3 | version: 1 4 | connections: 5 | - id: trino_0 6 | driver: io.trino.jdbc.TrinoDriver 7 | url: jdbc:trino://127.0.0.1:8081 8 | username: admin 9 | password: p@ssw0rd0 10 | - id: trino_1 11 | driver: io.trino.jdbc.TrinoDriver 12 | url: jdbc:trino://127.0.0.1:8081 13 | username: admin 14 | password: p@ssw0rd1 15 | -------------------------------------------------------------------------------- /core/src/main/resources/config/trino/sample_telemetry_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Telemetry Configuration 2 | --- 3 | version: 1 4 | connection: 5 | id: duckdb_0 6 | driver: org.duckdb.DuckDBDriver 7 | url: jdbc:duckdb:./telemetry 8 | execute_ddl: true 9 | ddl_file: 'src/main/resources/scripts/logging/duckdb/ddl.sql' 10 | insert_file: 'src/main/resources/scripts/logging/duckdb/insert.sql' 11 | # The following parameter values will be used to replace the variables in the logging statements. 12 | parameter_values: 13 | data_path: '' 14 | -------------------------------------------------------------------------------- /core/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /core/src/main/resources/schemas/workload.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "type": "object", 4 | "title": "Schema for workload definition file", 5 | "required": [ "version", "id", "phases" ], 6 | "properties": { 7 | "version": { 8 | "type": "integer", 9 | "title": "File format version" 10 | }, 11 | "id": { 12 | "type": "string", 13 | "title": "Identifier for the workload" 14 | }, 15 | "phases": { 16 | "type": "array", 17 | "title": "List of phases", 18 | "items": { 19 | "$ref": "resource:/schemas/instance.json#/$defs/phase" 20 | } 21 | } 22 | }, 23 | "additionalProperties": false 24 | } -------------------------------------------------------------------------------- /core/src/main/resources/scripts/logging/duckdb/ddl.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | TABLE 3 | IF NOT EXISTS experiment_telemetry( 4 | run_id STRING, 5 | event_start_time STRING, 6 | event_end_time STRING, 7 | event_id STRING, 8 | event_type STRING, 9 | event_status STRING, 10 | event_data STRING 11 | ); 12 | -------------------------------------------------------------------------------- /core/src/main/resources/scripts/logging/duckdb/insert.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | experiment_telemetry 4 | VALUES ${tuples}; 5 | 6 | --COPY experiment_telemetry TO '${data_path}experiment/experiment_telemetry.csv'( 7 | -- ESCAPE '\', 8 | -- HEADER 9 | --); 10 | 11 | -------------------------------------------------------------------------------- /core/src/main/resources/scripts/logging/spark/ddl.sql: -------------------------------------------------------------------------------- 1 | CREATE 2 | DATABASE IF NOT EXISTS ${catalog}.${database}; 3 | 4 | CREATE 5 | TABLE 6 | IF NOT EXISTS ${catalog}.${database}.experiment_telemetry( 7 | event_start_time STRING, 8 | event_end_time STRING, 9 | event_id STRING, 10 | event_type STRING, 11 | event_status STRING, 12 | event_data STRING 13 | ) 14 | USING csv OPTIONS( 15 | PATH '${path}experiment/' 16 | ); 17 | -------------------------------------------------------------------------------- /core/src/main/resources/scripts/logging/spark/insert.sql: -------------------------------------------------------------------------------- 1 | INSERT 2 | INTO 3 | ${catalog}.${database}.experiment_telemetry 4 | VALUES ${tuples}; 5 | -------------------------------------------------------------------------------- /core/src/test/resources/config/samples/connections_config_test0.yaml: -------------------------------------------------------------------------------- 1 | # Description: Connections Configuration 2 | --- 3 | version: 1 4 | connections: 5 | - id: spark_0 6 | driver: org.apache.hive.jdbc.HiveDriver 7 | url: jdbc:hive2://127.0.0.1:10000 8 | username: admin 9 | password: p@ssw0rd0 10 | - id: spark_1 11 | type: jdbc 12 | driver: org.apache.hive.jdbc.HiveDriver 13 | url: jdbc:hive2://127.0.0.1:10001 14 | - id: spark_2 15 | type: spark 16 | url: spark://127.0.0.1:7077 17 | - id: spark_3 18 | type: spark 19 | url: spark://127.0.0.1:7078 20 | config: 21 | spark.worker.timeout: "60" 22 | -------------------------------------------------------------------------------- /core/src/test/resources/config/samples/incorrect_telemetry_config_test0.yaml: -------------------------------------------------------------------------------- 1 | # Description: Telemetry Configuration 2 | --- 3 | version: 1 4 | connection: 5 | id: sqlite_0 6 | driver: org.sqlite.JDBC 7 | url: jdbc:sqlite:create_drop-telemetry.db 8 | non_existing_property: 0 9 | execute_ddl: true 10 | ddl_file: ../logging/ddl.sql 11 | insert_file: ../logging/insert.sql 12 | parameter_values: 13 | data_path: '' 14 | -------------------------------------------------------------------------------- /core/src/test/resources/config/samples/incorrect_telemetry_config_test1.yaml: -------------------------------------------------------------------------------- 1 | # Description: Telemetry Configuration 2 | --- 3 | version: 1 4 | connection: 5 | id: sqlite_0 6 | driver: org.sqlite.JDBC 7 | execute_ddl: true 8 | ddl_file: ../logging/ddl.sql 9 | insert_file: ../logging/insert.sql 10 | parameter_values: 11 | data_path: '' 12 | -------------------------------------------------------------------------------- /core/src/test/resources/config/samples/library_retry.yaml: -------------------------------------------------------------------------------- 1 | # Description: Tasks Library 2 | --- 3 | version: 1 4 | task_templates: 5 | - id: retry_query 6 | files: 7 | - src/test/resources/scripts/retry_test_query.sql 8 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/jdbc_connection_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Connections Configuration 2 | --- 3 | version: 1 4 | connections: 5 | - id: spark_0 6 | driver: org.apache.hive.jdbc.HiveDriver 7 | url: jdbc:hive2://127.0.0.1:10000 8 | username: admin 9 | password: p@ssw0rd0 10 | - id: spark_1 11 | driver: org.apache.hive.jdbc.HiveDriver 12 | url: jdbc:hive2://127.0.0.1:10000 13 | username: admin 14 | password: p@ssw0rd0 15 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/spark_connection_config-delta.yaml: -------------------------------------------------------------------------------- 1 | # Description: Connections Configuration 2 | --- 3 | version: 1 4 | connections: 5 | - id: spark_0 6 | type: spark 7 | url: local[*] 8 | config: 9 | spark.sql.catalog.spark_catalog: org.apache.spark.sql.delta.catalog.DeltaCatalog 10 | spark.sql.extensions: io.delta.sql.DeltaSparkSessionExtension 11 | 12 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/spark_connection_config-hudi.yaml: -------------------------------------------------------------------------------- 1 | # Description: Connections Configuration 2 | --- 3 | version: 1 4 | connections: 5 | - id: spark_0 6 | type: spark 7 | url: local[*] 8 | config: 9 | spark.sql.catalog.spark_catalog: org.apache.spark.sql.hudi.catalog.HoodieCatalog 10 | spark.sql.extensions: org.apache.spark.sql.hudi.HoodieSparkSessionExtension 11 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/spark_connection_config-iceberg.yaml: -------------------------------------------------------------------------------- 1 | # Description: Connections Configuration 2 | --- 3 | version: 1 4 | connections: 5 | - id: spark_0 6 | type: spark 7 | url: local[*] 8 | config: 9 | spark.sql.catalog.spark_catalog: org.apache.iceberg.spark.SparkSessionCatalog 10 | spark.sql.extensions: org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions 11 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/telemetry_config.yaml: -------------------------------------------------------------------------------- 1 | # Description: Telemetry Configuration 2 | --- 3 | version: 1 4 | connection: 5 | id: duckdb_0 6 | driver: org.duckdb.DuckDBDriver 7 | url: jdbc:duckdb:./telemetry 8 | execute_ddl: true 9 | ddl_file: 'src/main/resources/scripts/logging/duckdb/ddl.sql' 10 | insert_file: 'src/main/resources/scripts/logging/duckdb/insert.sql' 11 | # The following parameter values will be used to replace the variables in the logging statements. 12 | parameter_values: 13 | data_path: '' 14 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/w_all_tpcds_single_session-delta.yaml: -------------------------------------------------------------------------------- 1 | # Description: Workload for test: All task types, TPC-DS, Delta 2 | --- 3 | version: 1 4 | id: w_all_tpcds_single_session 5 | phases: 6 | - id: all 7 | sessions: 8 | - tasks: 9 | - template_id: setup 10 | - template_id: setup_data_maintenance 11 | - template_id: init 12 | - template_id: build 13 | - template_id: single_user 14 | - template_id: data_maintenance_delta 15 | - template_id: optimize_delta 16 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/w_all_tpcds_single_session-hudi.yaml: -------------------------------------------------------------------------------- 1 | # Description: Workload for test: All task types, TPC-DS, Hudi 2 | --- 3 | version: 1 4 | id: w_all_tpcds_single_session 5 | phases: 6 | - id: all 7 | sessions: 8 | - tasks: 9 | - template_id: setup 10 | - template_id: setup_data_maintenance 11 | - template_id: init 12 | - template_id: build 13 | replace_regex: 14 | - pattern: '(?i)varchar\(.*\)|char\(.*\)' 15 | replacement: 'string' 16 | - template_id: single_user 17 | - template_id: data_maintenance_hudi 18 | - template_id: optimize_hudi 19 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/w_all_tpcds_single_session-iceberg.yaml: -------------------------------------------------------------------------------- 1 | # Description: Workload for test: All task types, TPC-DS, Iceberg 2 | --- 3 | version: 1 4 | id: w_all_tpcds_single_session 5 | phases: 6 | - id: all 7 | sessions: 8 | - tasks: 9 | - template_id: setup 10 | - template_id: setup_data_maintenance 11 | - template_id: init 12 | - template_id: build 13 | replace_regex: 14 | - pattern: '(?i)options\((.|\n)*?\)' 15 | replacement: '' 16 | - template_id: single_user 17 | - template_id: data_maintenance_iceberg 18 | - template_id: optimize_iceberg 19 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/w_all_tpch-delta.yaml: -------------------------------------------------------------------------------- 1 | # Description: Workload for test: All task types, TPC-H, Delta 2 | --- 3 | version: 1 4 | id: w_all_tpch 5 | phases: 6 | - id: setup 7 | sessions: 8 | - tasks: 9 | - template_id: setup 10 | - id: setup_data_maintenance 11 | sessions: 12 | - tasks: 13 | - template_id: setup_data_maintenance 14 | - id: init 15 | sessions: 16 | - tasks: 17 | - template_id: init 18 | - id: build 19 | sessions: 20 | - tasks: 21 | - template_id: build 22 | - id: single_user 23 | sessions: 24 | - tasks: 25 | - template_id: single_user 26 | - id: data_maintenance_1 27 | sessions: 28 | - tasks: 29 | - template_id: data_maintenance_1 30 | - id: data_maintenance_2 31 | sessions: 32 | - tasks: 33 | - template_id: data_maintenance_2_merge 34 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/w_faulty_query_test.yaml: -------------------------------------------------------------------------------- 1 | # Description: Workload for test: Failure handling via SkipFailedQueryTaskExecutor 2 | --- 3 | version: 1 4 | id: w_faulty_query_test 5 | phases: 6 | - id: test 7 | sessions: 8 | - tasks: 9 | - prepared_task_id: task_faulty_query 10 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/w_faulty_query_test2.yaml: -------------------------------------------------------------------------------- 1 | # Description: Workload for test: Failure handling via SkipFailedQueryTaskExecutor 2 | --- 3 | version: 1 4 | id: w_faulty_query_test 5 | phases: 6 | - id: test 7 | sessions: 8 | - tasks: 9 | - template_id: faulty_query 10 | -------------------------------------------------------------------------------- /core/src/test/resources/config/spark/w_retry_query_test.yaml: -------------------------------------------------------------------------------- 1 | # Description: Workload for test: Failure handling via SkipFailedQueryTaskExecutor 2 | --- 3 | version: 1 4 | id: w_retry_query_test 5 | phases: 6 | - id: test 7 | sessions: 8 | - tasks: 9 | - template_id: retry_query 10 | task_executor_arguments: 11 | retry_erroneous_query_strings: testError -------------------------------------------------------------------------------- /core/src/test/resources/scripts/faulty_test_query.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | wrong_column1 AS ctr_customer_sk, 3 | wrong_column2 AS ctr_store_sk, 4 | SUM( SR_RETURN_AMT_INC_TAX ) AS ctr_total_return 5 | FROM 6 | ${catalog}.${database}.store_returns ${asof}, 7 | ${catalog}.${database}.date_dim; 8 | -------------------------------------------------------------------------------- /core/src/test/resources/scripts/retry_test_query.sql: -------------------------------------------------------------------------------- 1 | SELECT * FROM test; -------------------------------------------------------------------------------- /docs/20240609-LSTBench-DBTest24.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lst-bench/8e8c8592d4763c2dd58d7e28e78e6dd1dada0a8e/docs/20240609-LSTBench-DBTest24.pdf -------------------------------------------------------------------------------- /launcher.ps1: -------------------------------------------------------------------------------- 1 | # Constants 2 | $LST_BENCH_HOME = Get-Location 3 | $LST_BENCH_CLASSPATH = "$LST_BENCH_HOME\core\target\*;$LST_BENCH_HOME\core\target\lib\*;$LST_BENCH_HOME\core\target\classes\*" 4 | 5 | # Run Java command 6 | java -cp $LST_BENCH_CLASSPATH com.microsoft.lst_bench.Driver $args 7 | -------------------------------------------------------------------------------- /launcher.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Constants 4 | # Directory of the script 5 | export LST_BENCH_HOME="$(dirname "$(readlink -f "$0")")" 6 | LST_BENCH_CLASSPATH="$LST_BENCH_HOME/core/target/*:$LST_BENCH_HOME/core/target/lib/*:$LST_BENCH_HOME/core/target/classes/*" 7 | 8 | java -cp ${LST_BENCH_CLASSPATH} com.microsoft.lst_bench.Driver "$@" 9 | --------------------------------------------------------------------------------