├── README.md ├── energy ├── GEF2012-wind-forecasting │ ├── data_link.txt │ ├── queries │ │ └── GEF2012-wind-forecasting_query.sql │ └── tables │ │ └── GEF2012-wind-forecasting_schema.sql ├── ashrae-energy-prediction │ ├── data_link.txt │ ├── queries │ │ └── ashrae-energy-prediction_query.sql │ └── tables │ │ └── ashrae-energy-prediction_schema.sql ├── electric-power-consumption │ ├── data_link.txt │ ├── queries │ │ └── electric-power-consumption_query.sql │ └── tables │ │ └── electric-power-consumption_schema.sql └── energydata_complete │ ├── data_link.txt │ ├── queries │ └── energydata_complete_query.sql │ └── tables │ └── energydata_complete_schema.sql ├── finance ├── GiveMeSomeCredit │ ├── data_link.txt │ ├── queries │ │ ├── GiveMeSomeCredit_query_classification.sql │ │ └── GiveMeSomeCredit_query_regression.sql │ └── tables │ │ └── GiveMeSomeCredit_schema.sql ├── allstate-claims-severity │ ├── data_link.txt │ ├── queries │ │ └── allstate-claims-severity_query.sql │ └── tables │ │ └── allstate-claims-severity_schema.sql ├── daily-financial-news │ ├── data_link.txt │ └── tables │ │ └── daily-financial-news_schema.sql ├── dow_jones_index │ ├── data_link.txt │ ├── queries │ │ └── dow_jones_index_query.sql │ └── tables │ │ └── dow_jones_index_schema.sql ├── homesite-quote-conversion │ ├── data_link.txt │ ├── queries │ │ ├── homesite-quote-conversion_query_classification.sql │ │ └── homesite-quote-conversion_query_regression.sql │ └── tables │ │ └── homesite-quote-conversion_schema.sql ├── house-rent-prediction-dataset │ ├── data_link.txt │ ├── queries │ │ └── house-rent-prediction-dataset_query.sql │ └── tables │ │ └── house-rent-prediction-dataset_schema.sql ├── porto-seguro-safe-driver-prediction │ ├── data_link.txt │ ├── queries │ │ ├── porto-seguro-safe-driver-prediction_query_classification.sql │ │ └── porto-seguro-safe-driver-prediction_query_regression.sql │ └── tables │ │ └── porto-seguro-safe-driver-prediction_schema.sql ├── recruit-restaurant-visitor-forecasting │ ├── data_link.txt │ ├── queries │ │ └── recruit-restaurant-visitor-forecasting_query.sql │ └── tables │ │ └── recruit-restaurant-visitor-forecasting_schema.sql ├── restaurant-revenue-prediction │ ├── data_link.txt │ ├── queries │ │ └── restaurant-revenue-prediction_query.sql │ └── tables │ │ └── restaurant-revenue-prediction_schema.sql ├── robinhood-stock-data │ ├── data_link.txt │ ├── queries │ │ └── robinhood-stock-data_query.sql │ └── tables │ │ └── robinhood-stock-data_schema.sql ├── santander-customer-satisfaction │ ├── data_link.txt │ ├── queries │ │ ├── santander-customer-satisfaction_query_classification.sql │ │ └── santander-customer-satisfaction_query_regression.sql │ └── tables │ │ └── santander-customer-satisfaction_schema.sql ├── sberbank-russian-housing-market │ ├── data_link.txt │ ├── queries │ │ └── sberbank-russian-housing-market_query.sql │ └── tables │ │ └── sberbank-russian-housing-market_schema.sql └── tiantian │ ├── data_link.txt │ ├── queries │ └── tiantian_query.sql │ └── tables │ └── tiantian_schema.sql ├── health ├── big-data-derby-2022 │ ├── data_link.txt │ ├── queries │ │ └── big-data-derby-2022_query.sql │ └── tables │ │ └── big-data-derby-2022_schema.sql ├── covid19-global-forecasting-week-2 │ ├── data_link.txt │ ├── queries │ │ └── covid19-global-forecasting-week-2_query.sql │ └── tables │ │ └── covid19-global-forecasting-week-2_schema.sql ├── covid19-global-forecasting-week-3 │ ├── data_link.txt │ ├── queries │ │ └── covid19-global-forecasting-week-3_query.sql │ └── tables │ │ └── covid19-global-forecasting-week-3_schema.sql ├── covid19-global-forecasting-week-4 │ ├── data_link.txt │ ├── queries │ │ └── covid19-global-forecasting-week-4_query.sql │ └── tables │ │ └── covid19-global-forecasting-week-4_schema.sql ├── covid19-global-forecasting-week-5 │ ├── data_link.txt │ ├── queries │ │ └── covid19-global-forecasting-week-5_query.sql │ └── tables │ │ └── covid19-global-forecasting-week-5_schema.sql └── predict-west-nile-virus │ ├── data_link.txt │ ├── queries │ └── predict-west-nile-virus_query.sql │ └── tables │ └── predict-west-nile-virus_schema.sql ├── media ├── detecting-insults-in-social-commentary │ ├── data_link.txt │ ├── queries │ │ ├── detecting-insults-in-social-commentary_query_classification.sql │ │ └── detecting-insults-in-social-commentary_query_regression.sql │ └── tables │ │ └── detecting-insults-in-social-commentary_schema.sql ├── spotify-app-reviews-2022 │ ├── data_link.txt │ ├── queries │ │ └── spotify-app-reviews-2022_query.sql │ └── tables │ │ └── spotify-app-reviews-2022_schema.sql └── twitter-threads │ ├── data_link.txt │ ├── queries │ └── twitter-threads_query.sql │ └── tables │ └── twitter-threads_schema.sql ├── meteorology └── historicalweatherdataforindiancities │ ├── data_link.txt │ ├── queries │ └── historicalweatherdataforindiancities_query.sql │ └── tables │ └── historicalweatherdataforindiancities_schema.sql ├── others ├── DontGetKicked │ ├── data_link.txt │ ├── queries │ │ └── DontGetKicked_query.sql │ └── tables │ │ └── DontGetKicked_schema.sql ├── Hybrid_Indoor_Positioning │ ├── data_link.txt │ └── tables │ │ └── Hybrid_Indoor_Positioning_schema.sql ├── RSSI_dataset │ ├── data_link.txt │ └── tables │ │ └── RSSI_dataset_schema.sql ├── airbnb-recruiting-new-user-bookings │ ├── data_link.txt │ ├── queries │ │ └── airbnb-recruiting-new-user-bookings_query.sql │ └── tables │ │ └── airbnb-recruiting-new-user-bookings_schema.sql ├── bike-sharing-demand │ ├── data_link.txt │ ├── queries │ │ └── bike-sharing-demand_query.sql │ └── tables │ │ └── bike-sharing-demand_schema.sql ├── cyclistic-bike-share-user-dataset-1-year │ ├── data_link.txt │ ├── queries │ │ └── cyclistic-bike-share-user-dataset-1-year_query.sql │ └── tables │ │ └── cyclistic-bike-share-user-dataset-1-year_schema.sql ├── data-science-job-salaries │ ├── data_link.txt │ └── tables │ │ └── data-science-job-salaries_schema.sql ├── expedia-hotel-recommendations │ ├── data_link.txt │ ├── queries │ │ └── expedia-hotel-recommendations_query.sql │ └── tables │ │ └── expedia-hotel-recommendations_schema.sql ├── foursquare-location-matching │ ├── data_link.txt │ └── tables │ │ └── foursquare-location-matching_schema.sql ├── korean-baseball-pitching-data-1982-2021 │ ├── data_link.txt │ ├── queries │ │ └── korean-baseball-pitching-data-1982-2021_query.sql │ └── tables │ │ └── korean-baseball-pitching-data-1982-2021_schema.sql ├── sf-crime │ ├── data_link.txt │ └── tables │ │ └── sf-crime_schema.sql ├── talkingdata-mobile-user-demographics │ ├── data_link.txt │ └── tables │ │ └── talkingdata-mobile-user-demographics_schema.sql └── unimelb │ ├── data_link.txt │ ├── queries │ ├── unimelb_query_classification.sql │ └── unimelb_query_regression.sql │ └── tables │ └── unimelb_schema.sql ├── retails ├── competitive-data-science-predict-future-sales │ ├── data_link.txt │ ├── queries │ │ └── competitive-data-science-predict-future-sales_query.sql │ └── tables │ │ └── competitive-data-science-predict-future-sales_schema.sql ├── coupon-purchase-prediction │ ├── data_link.txt │ ├── queries │ │ └── coupon-purchase-prediction_query.sql │ └── tables │ │ └── coupon-purchase-prediction_schema.sql ├── ecommerce-customerssales-record │ ├── data_link.txt │ ├── queries │ │ └── ecommerce-customerssales-record_query.sql │ └── tables │ │ └── ecommerce-customerssales-record_schema.sql ├── favorita-grocery-sales-forecasting │ ├── data_link.txt │ ├── queries │ │ └── favorita-grocery-sales-forecasting_query.sql │ └── tables │ │ └── favorita-grocery-sales-forecasting_schema.sql ├── goods │ ├── data_link.txt │ └── tables │ │ ├── CreateIndexes.sql │ │ ├── CreatePrimaryKeys.sql │ │ └── CreateTables.sql ├── grupo-bimbo-inventory-demand │ ├── data_link.txt │ ├── queries │ │ └── grupo-bimbo-inventory-demand_query.sql │ └── tables │ │ └── grupo-bimbo-inventory-demand_schema.sql ├── m5-forecasting-accuracy │ ├── data_link.txt │ ├── queries │ │ └── m5-forecasting-accuracy_query.sql │ └── tables │ │ └── m5-forecasting-accuracy_schema.sql ├── m5-forecasting-uncertainty │ ├── data_link.txt │ ├── queries │ │ └── m5-forecasting-uncertainty_query.sql │ └── tables │ │ └── m5-forecasting-uncertainty_schema.sql ├── material │ ├── data_link.txt │ └── tables │ │ ├── CreateIndexes.sql │ │ ├── CreatePrimaryKeys.sql │ │ └── CreateTables.sql ├── orders │ ├── data_link.txt │ └── tables │ │ ├── CreateIndexes.sql │ │ ├── CreatePrimaryKeys.sql │ │ └── CreateTables.sql ├── rossmann-store-sales │ ├── data_link.txt │ ├── queries │ │ └── rossmann-store-sales_query.sql │ └── tables │ │ └── rossmann-store-sales_schema.sql ├── shopmall │ ├── data_link.txt │ └── tables │ │ ├── CreateIndexes.sql │ │ ├── CreatePrimaryKeys.sql │ │ └── CreateTables.sql ├── store-sales-time-series-forecasting │ ├── data_link.txt │ ├── queries │ │ └── store-sales-time-series-forecasting_query.sql │ └── tables │ │ └── store-sales-time-series-forecasting_schema.sql ├── transaction │ ├── data_link.txt │ └── tables │ │ ├── CreateIndexes.sql │ │ ├── CreatePrimaryKeys.sql │ │ └── CreateTables.sql └── walmart-recruiting-sales-in-stormy-weather │ ├── data_link.txt │ ├── queries │ └── walmart-recruiting-sales-in-stormy-weather_query.sql │ └── tables │ └── walmart-recruiting-sales-in-stormy-weather_schema.sql └── transport ├── nyc-taxi-trip-duration ├── data_link.txt ├── queries │ └── nyc-taxi-trip-duration_query.sql └── tables │ └── nyc-taxi-trip-duration_schema.sql └── taxi-trajectory ├── data_link.txt ├── queries └── taxi-trajectory_query.sql └── tables └── taxi-trajectory_schema.sql /energy/GEF2012-wind-forecasting/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/GEF2012-wind-forecasting/benchmark.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/GEF2012-wind-forecasting/test.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/GEF2012-wind-forecasting/train.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/GEF2012-wind-forecasting/windforecasts_wf1.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/GEF2012-wind-forecasting/windforecasts_wf2.csv 6 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/GEF2012-wind-forecasting/windforecasts_wf3.csv 7 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/GEF2012-wind-forecasting/windforecasts_wf4.csv 8 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/GEF2012-wind-forecasting/windforecasts_wf5.csv 9 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/GEF2012-wind-forecasting/windforecasts_wf6.csv 10 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/GEF2012-wind-forecasting/windforecasts_wf7.csv 11 | -------------------------------------------------------------------------------- /energy/GEF2012-wind-forecasting/queries/GEF2012-wind-forecasting_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `date` as date_1, 7 | `date` as train_date_original_0, 8 | `wp7` as train_wp7_original_2, 9 | `wp1` as train_wp1_original_32, 10 | `wp2` as train_wp2_original_33, 11 | `wp3` as train_wp3_original_34, 12 | `wp4` as train_wp4_original_35, 13 | `wp5` as train_wp5_original_36, 14 | `wp6` as train_wp6_original_37, 15 | log(`wp6`) as train_wp6_log_38, 16 | log(`wp3`) as train_wp3_log_39, 17 | dayofweek(timestamp(`date`)) as train_date_dayofweek_40, 18 | hour(timestamp(`date`)) as train_date_hourofday_41, 19 | case when 1 < dayofweek(timestamp(`date`)) and dayofweek(timestamp(`date`)) < 7 then 1 else 0 end as train_date_isweekday_42, 20 | log(`wp2`) as train_wp2_log_43 21 | from 22 | `train` 23 | ) 24 | as out0 25 | last join 26 | ( 27 | select 28 | `train`.`date` as date_3, 29 | `windforecasts_wf1_date`.`hors` as windforecasts_wf1_hors_multi_direct_3, 30 | `windforecasts_wf1_date`.`u` as windforecasts_wf1_u_multi_direct_4, 31 | `windforecasts_wf1_date`.`v` as windforecasts_wf1_v_multi_direct_5, 32 | `windforecasts_wf1_date`.`wd` as windforecasts_wf1_wd_multi_direct_6, 33 | `windforecasts_wf1_date`.`ws` as windforecasts_wf1_ws_multi_direct_7, 34 | `windforecasts_wf2_date`.`u` as windforecasts_wf2_u_multi_direct_8, 35 | `windforecasts_wf2_date`.`v` as windforecasts_wf2_v_multi_direct_9, 36 | `windforecasts_wf2_date`.`wd` as windforecasts_wf2_wd_multi_direct_10, 37 | `windforecasts_wf2_date`.`ws` as windforecasts_wf2_ws_multi_direct_11, 38 | `windforecasts_wf3_date`.`u` as windforecasts_wf3_u_multi_direct_12, 39 | `windforecasts_wf3_date`.`v` as windforecasts_wf3_v_multi_direct_13, 40 | `windforecasts_wf3_date`.`wd` as windforecasts_wf3_wd_multi_direct_14, 41 | `windforecasts_wf3_date`.`ws` as windforecasts_wf3_ws_multi_direct_15, 42 | `windforecasts_wf4_date`.`u` as windforecasts_wf4_u_multi_direct_16, 43 | `windforecasts_wf4_date`.`v` as windforecasts_wf4_v_multi_direct_17, 44 | `windforecasts_wf4_date`.`wd` as windforecasts_wf4_wd_multi_direct_18, 45 | `windforecasts_wf4_date`.`ws` as windforecasts_wf4_ws_multi_direct_19, 46 | `windforecasts_wf5_date`.`u` as windforecasts_wf5_u_multi_direct_20, 47 | `windforecasts_wf5_date`.`v` as windforecasts_wf5_v_multi_direct_21, 48 | `windforecasts_wf5_date`.`wd` as windforecasts_wf5_wd_multi_direct_22, 49 | `windforecasts_wf5_date`.`ws` as windforecasts_wf5_ws_multi_direct_23, 50 | `windforecasts_wf6_date`.`u` as windforecasts_wf6_u_multi_direct_24, 51 | `windforecasts_wf6_date`.`v` as windforecasts_wf6_v_multi_direct_25, 52 | `windforecasts_wf6_date`.`wd` as windforecasts_wf6_wd_multi_direct_26, 53 | `windforecasts_wf6_date`.`ws` as windforecasts_wf6_ws_multi_direct_27, 54 | `windforecasts_wf7_date`.`u` as windforecasts_wf7_u_multi_direct_28, 55 | `windforecasts_wf7_date`.`v` as windforecasts_wf7_v_multi_direct_29, 56 | `windforecasts_wf7_date`.`wd` as windforecasts_wf7_wd_multi_direct_30, 57 | `windforecasts_wf7_date`.`ws` as windforecasts_wf7_ws_multi_direct_31 58 | from 59 | `train` 60 | last join `windforecasts_wf1` as `windforecasts_wf1_date` on `train`.`date` = `windforecasts_wf1_date`.`date` 61 | last join `windforecasts_wf2` as `windforecasts_wf2_date` on `train`.`date` = `windforecasts_wf2_date`.`date` 62 | last join `windforecasts_wf3` as `windforecasts_wf3_date` on `train`.`date` = `windforecasts_wf3_date`.`date` 63 | last join `windforecasts_wf4` as `windforecasts_wf4_date` on `train`.`date` = `windforecasts_wf4_date`.`date` 64 | last join `windforecasts_wf5` as `windforecasts_wf5_date` on `train`.`date` = `windforecasts_wf5_date`.`date` 65 | last join `windforecasts_wf6` as `windforecasts_wf6_date` on `train`.`date` = `windforecasts_wf6_date`.`date` 66 | last join `windforecasts_wf7` as `windforecasts_wf7_date` on `train`.`date` = `windforecasts_wf7_date`.`date`) 67 | as out1 68 | on out0.date_1 = out1.date_3 69 | ; -------------------------------------------------------------------------------- /energy/GEF2012-wind-forecasting/tables/GEF2012-wind-forecasting_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE benchmark (id INT, date VARCHAR, wp1 DOUBLE PRECISION, wp2 DOUBLE PRECISION, wp3 DOUBLE PRECISION, wp4 DOUBLE PRECISION, wp5 DOUBLE PRECISION, wp6 DOUBLE PRECISION, wp7 DOUBLE PRECISION); 2 | CREATE TABLE test (id INT, date VARCHAR); 3 | CREATE TABLE train (date VARCHAR, wp1 DOUBLE PRECISION, wp2 DOUBLE PRECISION, wp3 DOUBLE PRECISION, wp4 DOUBLE PRECISION, wp5 DOUBLE PRECISION, wp6 DOUBLE PRECISION, wp7 DOUBLE PRECISION); 4 | CREATE TABLE windforecasts_wf1 (date VARCHAR, hors INT, u VARCHAR, v VARCHAR, ws VARCHAR, wd VARCHAR); 5 | CREATE TABLE windforecasts_wf2 (date VARCHAR, hors INT, u VARCHAR, v VARCHAR, ws VARCHAR, wd VARCHAR); 6 | CREATE TABLE windforecasts_wf3 (date VARCHAR, hors INT, u VARCHAR, v VARCHAR, ws VARCHAR, wd VARCHAR); 7 | CREATE TABLE windforecasts_wf4 (date VARCHAR, hors INT, u VARCHAR, v VARCHAR, ws VARCHAR, wd VARCHAR); 8 | CREATE TABLE windforecasts_wf5 (date VARCHAR, hors INT, u VARCHAR, v VARCHAR, ws VARCHAR, wd VARCHAR); 9 | CREATE TABLE windforecasts_wf6 (date VARCHAR, hors INT, u VARCHAR, v VARCHAR, ws VARCHAR, wd VARCHAR); 10 | CREATE TABLE windforecasts_wf7 (date VARCHAR, hors INT, u VARCHAR, v VARCHAR, ws VARCHAR, wd VARCHAR); 11 | -------------------------------------------------------------------------------- /energy/ashrae-energy-prediction/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/ashrae-energy-prediction/building_metadata.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/ashrae-energy-prediction/test.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/ashrae-energy-prediction/train.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/ashrae-energy-prediction/weather_test.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/ashrae-energy-prediction/weather_train.csv 6 | -------------------------------------------------------------------------------- /energy/ashrae-energy-prediction/queries/ashrae-energy-prediction_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `row_id` as row_id_1, 7 | `timestamp` as test_timestamp_original_0, 8 | `row_id` as test_row_id_original_1, 9 | `meter` as test_meter_original_2, 10 | `building_id` as test_building_id_original_10, 11 | case when 1 < dayofweek(timestamp(`timestamp`)) and dayofweek(timestamp(`timestamp`)) < 7 then 1 else 0 end as test_timestamp_isweekday_13, 12 | dayofweek(timestamp(`timestamp`)) as test_timestamp_dayofweek_16, 13 | hour(timestamp(`timestamp`)) as test_timestamp_hourofday_18, 14 | `building_id` as test_building_id_combine_19, 15 | `building_id` as test_building_id_combine_20 16 | from 17 | `test` 18 | ) 19 | as out0 20 | last join 21 | ( 22 | select 23 | `row_id` as row_id_4, 24 | avg(`meter_reading`) over train_building_id_timestamp_0s_64d_100 as train_meter_reading_multi_avg_3, 25 | distinct_count(`meter`) over train_building_id_timestamp_0s_64d_100 as train_meter_multi_unique_count_9 26 | from 27 | (select `building_id` as `building_id`, int(0) as `meter`, `timestamp` as `timestamp`, double(0) as `meter_reading`, row_id from `test`) 28 | window train_building_id_timestamp_0s_64d_100 as ( 29 | UNION (select `building_id`, `meter`, `timestamp`, `meter_reading`, int(0) as row_id from `train`) partition by `building_id` order by `timestamp` rows_range between 64d open preceding and 0s preceding MAXSIZE 100 INSTANCE_NOT_IN_WINDOW)) 30 | as out1 31 | on out0.row_id_1 = out1.row_id_4 32 | last join 33 | ( 34 | select 35 | `test`.`row_id` as row_id_5, 36 | `building_metadata_building_id`.`floor_count` as building_metadata_floor_count_multi_direct_4, 37 | `building_metadata_building_id`.`primary_use` as building_metadata_primary_use_multi_direct_5, 38 | `building_metadata_building_id`.`site_id` as building_metadata_site_id_multi_direct_6, 39 | `building_metadata_building_id`.`square_feet` as building_metadata_square_feet_multi_direct_7, 40 | `building_metadata_building_id`.`year_built` as building_metadata_year_built_multi_direct_8 41 | from 42 | `test` 43 | last join `building_metadata` as `building_metadata_building_id` on `test`.`building_id` = `building_metadata_building_id`.`building_id`) 44 | as out2 45 | on out0.row_id_1 = out2.row_id_5 46 | ; -------------------------------------------------------------------------------- /energy/ashrae-energy-prediction/tables/ashrae-energy-prediction_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE building_metadata (site_id INT, building_id INT, primary_use VARCHAR, square_feet DOUBLE PRECISION, year_built INT, floor_count DOUBLE PRECISION); 2 | CREATE TABLE test (row_id INT, building_id INT, meter INT, timestamp TIMESTAMP); 3 | CREATE TABLE train (building_id INT, meter INT, timestamp TIMESTAMP, meter_reading DOUBLE PRECISION); 4 | CREATE TABLE weather_test (site_id INT, timestamp TIMESTAMP, air_temperature DOUBLE PRECISION, cloud_coverage DOUBLE PRECISION, dew_temperature DOUBLE PRECISION, precip_depth_1_hr DOUBLE PRECISION, sea_level_pressure DOUBLE PRECISION, wind_direction DOUBLE PRECISION, wind_speed DOUBLE PRECISION); 5 | CREATE TABLE weather_train (site_id INT, timestamp TIMESTAMP, air_temperature DOUBLE PRECISION, cloud_coverage DOUBLE PRECISION, dew_temperature DOUBLE PRECISION, precip_depth_1_hr DOUBLE PRECISION, sea_level_pressure DOUBLE PRECISION, wind_direction DOUBLE PRECISION, wind_speed DOUBLE PRECISION); 6 | -------------------------------------------------------------------------------- /energy/electric-power-consumption/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/electric-power-consumption/powerconsumption.csv 2 | -------------------------------------------------------------------------------- /energy/electric-power-consumption/queries/electric-power-consumption_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `Datetime` as Datetime_1, 6 | `Datetime` as powerconsumption_Datetime_original_0, 7 | `PowerConsumption_Zone1` as powerconsumption_PowerConsumption_Zone1_original_2, 8 | `DiffuseFlows` as powerconsumption_DiffuseFlows_original_3, 9 | `GeneralDiffuseFlows` as powerconsumption_GeneralDiffuseFlows_original_4, 10 | `Humidity` as powerconsumption_Humidity_original_5, 11 | `PowerConsumption_Zone2` as powerconsumption_PowerConsumption_Zone2_original_6, 12 | `PowerConsumption_Zone3` as powerconsumption_PowerConsumption_Zone3_original_7, 13 | `Temperature` as powerconsumption_Temperature_original_8, 14 | `WindSpeed` as powerconsumption_WindSpeed_original_9, 15 | `PowerConsumption_Zone2` as powerconsumption_PowerConsumption_Zone2_multiply_10, 16 | `PowerConsumption_Zone3` as powerconsumption_PowerConsumption_Zone3_multiply_10, 17 | `PowerConsumption_Zone2` as powerconsumption_PowerConsumption_Zone2_multiply_11, 18 | `PowerConsumption_Zone3` as powerconsumption_PowerConsumption_Zone3_multiply_11, 19 | `WindSpeed` as powerconsumption_WindSpeed_multiply_11, 20 | log(`PowerConsumption_Zone2`) as powerconsumption_PowerConsumption_Zone2_log_12, 21 | `PowerConsumption_Zone2` as powerconsumption_PowerConsumption_Zone2_multiply_13, 22 | `PowerConsumption_Zone3` as powerconsumption_PowerConsumption_Zone3_multiply_13, 23 | `Temperature` as powerconsumption_Temperature_multiply_13, 24 | `WindSpeed` as powerconsumption_WindSpeed_divide_14, 25 | `PowerConsumption_Zone2` as powerconsumption_PowerConsumption_Zone2_divide_14, 26 | `PowerConsumption_Zone2` as powerconsumption_PowerConsumption_Zone2_divide_15, 27 | `WindSpeed` as powerconsumption_WindSpeed_divide_15, 28 | `PowerConsumption_Zone2` as powerconsumption_PowerConsumption_Zone2_multiply_16, 29 | `WindSpeed` as powerconsumption_WindSpeed_multiply_16, 30 | log(`PowerConsumption_Zone3`) as powerconsumption_PowerConsumption_Zone3_log_17, 31 | `PowerConsumption_Zone2` as powerconsumption_PowerConsumption_Zone2_multiply_18, 32 | `PowerConsumption_Zone3` as powerconsumption_PowerConsumption_Zone3_multiply_18, 33 | `Humidity` as powerconsumption_Humidity_multiply_18, 34 | hour(timestamp(`Datetime`)) as powerconsumption_Datetime_hourofday_19, 35 | case when 1 < dayofweek(timestamp(`Datetime`)) and dayofweek(timestamp(`Datetime`)) < 7 then 1 else 0 end as powerconsumption_Datetime_isweekday_20, 36 | dayofweek(timestamp(`Datetime`)) as powerconsumption_Datetime_dayofweek_21, 37 | `PowerConsumption_Zone3` as powerconsumption_PowerConsumption_Zone3_multiply_22, 38 | `WindSpeed` as powerconsumption_WindSpeed_multiply_22, 39 | `PowerConsumption_Zone2` as powerconsumption_PowerConsumption_Zone2_divide_23, 40 | `Humidity` as powerconsumption_Humidity_divide_23, 41 | `Humidity` as powerconsumption_Humidity_divide_24, 42 | `PowerConsumption_Zone2` as powerconsumption_PowerConsumption_Zone2_divide_24, 43 | `PowerConsumption_Zone3` as powerconsumption_PowerConsumption_Zone3_divide_25, 44 | `Humidity` as powerconsumption_Humidity_divide_25, 45 | `Humidity` as powerconsumption_Humidity_divide_26, 46 | `PowerConsumption_Zone3` as powerconsumption_PowerConsumption_Zone3_divide_26, 47 | `PowerConsumption_Zone3` as powerconsumption_PowerConsumption_Zone3_divide_27, 48 | `WindSpeed` as powerconsumption_WindSpeed_divide_27 49 | from 50 | `powerconsumption` 51 | ; -------------------------------------------------------------------------------- /energy/electric-power-consumption/tables/electric-power-consumption_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE powerconsumption (Datetime TIMESTAMP, Temperature DOUBLE PRECISION, Humidity DOUBLE PRECISION, WindSpeed DOUBLE PRECISION, GeneralDiffuseFlows DOUBLE PRECISION, DiffuseFlows DOUBLE PRECISION, PowerConsumption_Zone1 DOUBLE PRECISION, PowerConsumption_Zone2 DOUBLE PRECISION, PowerConsumption_Zone3 DOUBLE PRECISION); 2 | -------------------------------------------------------------------------------- /energy/energydata_complete/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/energy/energydata_complete/energydata_complete.csv 2 | -------------------------------------------------------------------------------- /energy/energydata_complete/queries/energydata_complete_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `id` as id_1, 6 | `date` as energydata_complete_date_original_0, 7 | `id` as energydata_complete_id_original_1, 8 | `lights` as energydata_complete_lights_original_2, 9 | `Appliances` as energydata_complete_Appliances_original_3, 10 | `Press_mm_hg` as energydata_complete_Press_mm_hg_original_4, 11 | `RH_1` as energydata_complete_RH_1_original_5, 12 | `RH_2` as energydata_complete_RH_2_original_6, 13 | `RH_3` as energydata_complete_RH_3_original_7, 14 | `RH_4` as energydata_complete_RH_4_original_8, 15 | `RH_5` as energydata_complete_RH_5_original_9, 16 | `RH_6` as energydata_complete_RH_6_original_10, 17 | `RH_7` as energydata_complete_RH_7_original_11, 18 | `RH_8` as energydata_complete_RH_8_original_12, 19 | `RH_9` as energydata_complete_RH_9_original_13, 20 | `RH_out` as energydata_complete_RH_out_original_14, 21 | `T1` as energydata_complete_T1_original_15, 22 | `T2` as energydata_complete_T2_original_16, 23 | `T3` as energydata_complete_T3_original_17, 24 | `T4` as energydata_complete_T4_original_18, 25 | `T5` as energydata_complete_T5_original_19, 26 | `T6` as energydata_complete_T6_original_20, 27 | `T7` as energydata_complete_T7_original_21, 28 | `T8` as energydata_complete_T8_original_22, 29 | `T9` as energydata_complete_T9_original_23, 30 | `T_out` as energydata_complete_T_out_original_24, 31 | `Tdewpoint` as energydata_complete_Tdewpoint_original_25, 32 | `Visibility` as energydata_complete_Visibility_original_26, 33 | `Windspeed` as energydata_complete_Windspeed_original_27, 34 | `rv1` as energydata_complete_rv1_original_28 35 | from 36 | `energydata_complete` 37 | ; -------------------------------------------------------------------------------- /energy/energydata_complete/tables/energydata_complete_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE energydata_complete (id INT, date TIMESTAMP, Appliances INT, lights INT, T1 DOUBLE PRECISION, RH_1 DOUBLE PRECISION, T2 DOUBLE PRECISION, RH_2 DOUBLE PRECISION, T3 DOUBLE PRECISION, RH_3 DOUBLE PRECISION, T4 DOUBLE PRECISION, RH_4 DOUBLE PRECISION, T5 DOUBLE PRECISION, RH_5 DOUBLE PRECISION, T6 DOUBLE PRECISION, RH_6 DOUBLE PRECISION, T7 DOUBLE PRECISION, RH_7 DOUBLE PRECISION, T8 DOUBLE PRECISION, RH_8 DOUBLE PRECISION, T9 DOUBLE PRECISION, RH_9 DOUBLE PRECISION, T_out DOUBLE PRECISION, Press_mm_hg DOUBLE PRECISION, RH_out DOUBLE PRECISION, Windspeed DOUBLE PRECISION, Visibility DOUBLE PRECISION, Tdewpoint DOUBLE PRECISION, rv1 DOUBLE PRECISION, rv2 DOUBLE PRECISION); 2 | -------------------------------------------------------------------------------- /finance/GiveMeSomeCredit/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/GiveMeSomeCredit/cs-training.csv 2 | -------------------------------------------------------------------------------- /finance/GiveMeSomeCredit/tables/GiveMeSomeCredit_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE cs_training (timestamp TIMESTAMP, id INT, SeriousDlqin2yrs INT, RevolvingUtilizationOfUnsecuredLines DOUBLE PRECISION, age INT, NumberOfTime30_59DaysPastDueNotWorse INT, DebtRatio DOUBLE PRECISION, MonthlyIncome VARCHAR, NumberOfOpenCreditLinesAndLoans INT, NumberOfTimes90DaysLate INT, NumberRealEstateLoansOrLines INT, NumberOfTime60_89DaysPastDueNotWorse INT, NumberOfDependents VARCHAR); 2 | -------------------------------------------------------------------------------- /finance/allstate-claims-severity/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/allstate-claims-severity/train.csv 2 | -------------------------------------------------------------------------------- /finance/allstate-claims-severity/tables/allstate-claims-severity_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE train (timestamp TIMESTAMP, id INT, cat1 VARCHAR, cat2 VARCHAR, cat3 VARCHAR, cat4 VARCHAR, cat5 VARCHAR, cat6 VARCHAR, cat7 VARCHAR, cat8 VARCHAR, cat9 VARCHAR, cat10 VARCHAR, cat11 VARCHAR, cat12 VARCHAR, cat13 VARCHAR, cat14 VARCHAR, cat15 VARCHAR, cat16 VARCHAR, cat17 VARCHAR, cat18 VARCHAR, cat19 VARCHAR, cat20 VARCHAR, cat21 VARCHAR, cat22 VARCHAR, cat23 VARCHAR, cat24 VARCHAR, cat25 VARCHAR, cat26 VARCHAR, cat27 VARCHAR, cat28 VARCHAR, cat29 VARCHAR, cat30 VARCHAR, cat31 VARCHAR, cat32 VARCHAR, cat33 VARCHAR, cat34 VARCHAR, cat35 VARCHAR, cat36 VARCHAR, cat37 VARCHAR, cat38 VARCHAR, cat39 VARCHAR, cat40 VARCHAR, cat41 VARCHAR, cat42 VARCHAR, cat43 VARCHAR, cat44 VARCHAR, cat45 VARCHAR, cat46 VARCHAR, cat47 VARCHAR, cat48 VARCHAR, cat49 VARCHAR, cat50 VARCHAR, cat51 VARCHAR, cat52 VARCHAR, cat53 VARCHAR, cat54 VARCHAR, cat55 VARCHAR, cat56 VARCHAR, cat57 VARCHAR, cat58 VARCHAR, cat59 VARCHAR, cat60 VARCHAR, cat61 VARCHAR, cat62 VARCHAR, cat63 VARCHAR, cat64 VARCHAR, cat65 VARCHAR, cat66 VARCHAR, cat67 VARCHAR, cat68 VARCHAR, cat69 VARCHAR, cat70 VARCHAR, cat71 VARCHAR, cat72 VARCHAR, cat73 VARCHAR, cat74 VARCHAR, cat75 VARCHAR, cat76 VARCHAR, cat77 VARCHAR, cat78 VARCHAR, cat79 VARCHAR, cat80 VARCHAR, cat81 VARCHAR, cat82 VARCHAR, cat83 VARCHAR, cat84 VARCHAR, cat85 VARCHAR, cat86 VARCHAR, cat87 VARCHAR, cat88 VARCHAR, cat89 VARCHAR, cat90 VARCHAR, cat91 VARCHAR, cat92 VARCHAR, cat93 VARCHAR, cat94 VARCHAR, cat95 VARCHAR, cat96 VARCHAR, cat97 VARCHAR, cat98 VARCHAR, cat99 VARCHAR, cat100 VARCHAR, cat101 VARCHAR, cat102 VARCHAR, cat103 VARCHAR, cat104 VARCHAR, cat105 VARCHAR, cat106 VARCHAR, cat107 VARCHAR, cat108 VARCHAR, cat109 VARCHAR, cat110 VARCHAR, cat111 VARCHAR, cat112 VARCHAR, cat113 VARCHAR, cat114 VARCHAR, cat115 VARCHAR, cat116 VARCHAR, cont1 DOUBLE PRECISION, cont2 DOUBLE PRECISION, cont3 DOUBLE PRECISION, cont4 DOUBLE PRECISION, cont5 DOUBLE PRECISION, cont6 DOUBLE PRECISION, cont7 DOUBLE PRECISION, cont8 DOUBLE PRECISION, cont9 DOUBLE PRECISION, cont10 DOUBLE PRECISION, cont11 DOUBLE PRECISION, cont12 DOUBLE PRECISION, cont13 DOUBLE PRECISION, cont14 DOUBLE PRECISION, loss DOUBLE PRECISION); 2 | -------------------------------------------------------------------------------- /finance/daily-financial-news/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/daily-financial-news/raw_analyst_ratings.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/daily-financial-news/raw_partner_headlines.csv 3 | -------------------------------------------------------------------------------- /finance/daily-financial-news/tables/daily-financial-news_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE raw_analyst_ratings (id INT, headline VARCHAR, url VARCHAR, publisher VARCHAR, date TIMESTAMP, stock VARCHAR); 2 | CREATE TABLE raw_partner_headlines (id INT, headline VARCHAR, url VARCHAR, publisher VARCHAR, date TIMESTAMP, stock VARCHAR); 3 | -------------------------------------------------------------------------------- /finance/dow_jones_index/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/dow_jones_index/dow_jones_index.csv 2 | -------------------------------------------------------------------------------- /finance/dow_jones_index/tables/dow_jones_index_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE dow_jones_index (quarter INT, stock VARCHAR, date TIMESTAMP, open DOUBLE PRECISION, high DOUBLE PRECISION, low DOUBLE PRECISION, close DOUBLE PRECISION, volume INT, percent_change_price DOUBLE PRECISION, percent_change_volume_over_last_wk DOUBLE PRECISION, previous_weeks_volume INT, next_weeks_open DOUBLE PRECISION, next_weeks_close DOUBLE PRECISION, percent_change_next_weeks_price DOUBLE PRECISION, days_to_next_dividend INT, percent_return_next_dividend DOUBLE PRECISION); 2 | -------------------------------------------------------------------------------- /finance/homesite-quote-conversion/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/homesite-quote-conversion/train.csv 2 | -------------------------------------------------------------------------------- /finance/house-rent-prediction-dataset/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/house-rent-prediction-dataset/House_Rent_Dataset.csv 2 | -------------------------------------------------------------------------------- /finance/house-rent-prediction-dataset/tables/house-rent-prediction-dataset_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE House_Rent_Dataset (id INT, Posted_On TIMESTAMP, BHK INT, Rent INT, Size INT, Floor VARCHAR, Area_Type VARCHAR, Area_Locality VARCHAR, City VARCHAR, Furnishing_Status VARCHAR, Tenant_Preferred VARCHAR, Bathroom INT, Point_of_Contact VARCHAR); 2 | -------------------------------------------------------------------------------- /finance/porto-seguro-safe-driver-prediction/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/porto-seguro-safe-driver-prediction/train.csv 2 | -------------------------------------------------------------------------------- /finance/porto-seguro-safe-driver-prediction/tables/porto-seguro-safe-driver-prediction_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE train (timestamp TIMESTAMP, id INT, target INT, ps_ind_01 INT, ps_ind_02_cat INT, ps_ind_03 INT, ps_ind_04_cat INT, ps_ind_05_cat INT, ps_ind_06_bin INT, ps_ind_07_bin INT, ps_ind_08_bin INT, ps_ind_09_bin INT, ps_ind_10_bin INT, ps_ind_11_bin INT, ps_ind_12_bin INT, ps_ind_13_bin INT, ps_ind_14 INT, ps_ind_15 INT, ps_ind_16_bin INT, ps_ind_17_bin INT, ps_ind_18_bin INT, ps_reg_01 DOUBLE PRECISION, ps_reg_02 DOUBLE PRECISION, ps_reg_03 DOUBLE PRECISION, ps_car_01_cat INT, ps_car_02_cat INT, ps_car_03_cat INT, ps_car_04_cat INT, ps_car_05_cat INT, ps_car_06_cat INT, ps_car_07_cat INT, ps_car_08_cat INT, ps_car_09_cat INT, ps_car_10_cat INT, ps_car_11_cat INT, ps_car_11 INT, ps_car_12 DOUBLE PRECISION, ps_car_13 DOUBLE PRECISION, ps_car_14 DOUBLE PRECISION, ps_car_15 DOUBLE PRECISION, ps_calc_01 DOUBLE PRECISION, ps_calc_02 DOUBLE PRECISION, ps_calc_03 DOUBLE PRECISION, ps_calc_04 INT, ps_calc_05 INT, ps_calc_06 INT, ps_calc_07 INT, ps_calc_08 INT, ps_calc_09 INT, ps_calc_10 INT, ps_calc_11 INT, ps_calc_12 INT, ps_calc_13 INT, ps_calc_14 INT, ps_calc_15_bin INT, ps_calc_16_bin INT, ps_calc_17_bin INT, ps_calc_18_bin INT, ps_calc_19_bin INT, ps_calc_20_bin INT); 2 | -------------------------------------------------------------------------------- /finance/recruit-restaurant-visitor-forecasting/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/recruit-restaurant-visitor-forecasting/air_reserve.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/recruit-restaurant-visitor-forecasting/air_store_info.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/recruit-restaurant-visitor-forecasting/air_visit_data.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/recruit-restaurant-visitor-forecasting/date_info.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/recruit-restaurant-visitor-forecasting/hpg_reserve.csv 6 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/recruit-restaurant-visitor-forecasting/hpg_store_info.csv 7 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/recruit-restaurant-visitor-forecasting/store_id_relation.csv 8 | -------------------------------------------------------------------------------- /finance/recruit-restaurant-visitor-forecasting/queries/recruit-restaurant-visitor-forecasting_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `air_store_id` as air_store_id_1, 7 | `visit_date` as air_visit_data_visit_date_original_0, 8 | `air_store_id` as air_visit_data_air_store_id_original_1, 9 | `visitors` as air_visit_data_visitors_original_2, 10 | dayofweek(timestamp(`visit_date`)) as air_visit_data_visit_date_dayofweek_9, 11 | case when 1 < dayofweek(timestamp(`visit_date`)) and dayofweek(timestamp(`visit_date`)) < 7 then 1 else 0 end as air_visit_data_visit_date_isweekday_10, 12 | hour(timestamp(`visit_date`)) as air_visit_data_visit_date_hourofday_14 13 | from 14 | `air_visit_data` 15 | ) 16 | as out0 17 | last join 18 | ( 19 | select 20 | `air_visit_data`.`air_store_id` as air_store_id_4, 21 | `air_store_info_air_store_id`.`air_area_name` as air_store_info_air_area_name_multi_direct_3, 22 | `air_store_info_air_store_id`.`air_genre_name` as air_store_info_air_genre_name_multi_direct_4, 23 | `air_store_info_air_store_id`.`latitude` as air_store_info_latitude_multi_direct_5, 24 | `store_id_relation_air_store_id`.`hpg_store_id` as store_id_relation_hpg_store_id_multi_direct_6 25 | from 26 | `air_visit_data` 27 | last join `air_store_info` as `air_store_info_air_store_id` on `air_visit_data`.`air_store_id` = `air_store_info_air_store_id`.`air_store_id` 28 | last join `store_id_relation` as `store_id_relation_air_store_id` on `air_visit_data`.`air_store_id` = `store_id_relation_air_store_id`.`air_store_id`) 29 | as out1 30 | on out0.air_store_id_1 = out1.air_store_id_4 31 | last join 32 | ( 33 | select 34 | `air_store_id` as air_store_id_8, 35 | fz_topn_frequency(`reserve_visitors`, 3) over air_reserve_air_store_id_visit_datetime_0s_64d_100 as air_reserve_reserve_visitors_multi_top3frequency_7, 36 | distinct_count(`reserve_visitors`) over air_reserve_air_store_id_visit_datetime_0s_64d_100 as air_reserve_reserve_visitors_multi_unique_count_8 37 | from 38 | (select `air_store_id` as `air_store_id`, `visit_date` as `visit_datetime`, timestamp('2019-07-18 09:20:20') as `reserve_datetime`, int(0) as `reserve_visitors` from `air_visit_data`) 39 | window air_reserve_air_store_id_visit_datetime_0s_64d_100 as ( 40 | UNION `air_reserve` partition by `air_store_id` order by `visit_datetime` rows_range between 64d open preceding and 0s preceding MAXSIZE 100 INSTANCE_NOT_IN_WINDOW)) 41 | as out2 42 | on out0.air_store_id_1 = out2.air_store_id_8 43 | ; -------------------------------------------------------------------------------- /finance/recruit-restaurant-visitor-forecasting/tables/recruit-restaurant-visitor-forecasting_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE air_reserve (air_store_id VARCHAR, visit_datetime TIMESTAMP, reserve_datetime TIMESTAMP, reserve_visitors INT); 2 | CREATE TABLE air_store_info (air_store_id VARCHAR, air_genre_name VARCHAR, air_area_name VARCHAR, latitude DOUBLE PRECISION, longitude DOUBLE PRECISION); 3 | CREATE TABLE air_visit_data (air_store_id VARCHAR, visit_date TIMESTAMP, visitors INT); 4 | CREATE TABLE date_info (calendar_date TIMESTAMP, day_of_week VARCHAR, holiday_flg INT); 5 | CREATE TABLE hpg_reserve (hpg_store_id VARCHAR, visit_datetime TIMESTAMP, reserve_datetime TIMESTAMP, reserve_visitors INT); 6 | CREATE TABLE hpg_store_info (hpg_store_id VARCHAR, hpg_genre_name VARCHAR, hpg_area_name VARCHAR, latitude DOUBLE PRECISION, longitude DOUBLE PRECISION); 7 | CREATE TABLE store_id_relation (air_store_id VARCHAR, hpg_store_id VARCHAR); 8 | -------------------------------------------------------------------------------- /finance/restaurant-revenue-prediction/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/restaurant-revenue-prediction/train.csv 2 | -------------------------------------------------------------------------------- /finance/restaurant-revenue-prediction/queries/restaurant-revenue-prediction_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `Id` as Id_1, 6 | `Open_Date` as train_Open_Date_original_0, 7 | `Id` as train_Id_original_1, 8 | `revenue` as train_revenue_original_2, 9 | `City` as train_City_original_3, 10 | `City_Group` as train_City_Group_original_4, 11 | `P1` as train_P1_original_5, 12 | `P10` as train_P10_original_6, 13 | `P11` as train_P11_original_7, 14 | `P12` as train_P12_original_8, 15 | `P13` as train_P13_original_9, 16 | `P14` as train_P14_original_10, 17 | `P15` as train_P15_original_11, 18 | `P16` as train_P16_original_12, 19 | `P17` as train_P17_original_13, 20 | `P18` as train_P18_original_14, 21 | `P19` as train_P19_original_15, 22 | `P2` as train_P2_original_16, 23 | `P20` as train_P20_original_17, 24 | `P21` as train_P21_original_18, 25 | `P22` as train_P22_original_19, 26 | `P23` as train_P23_original_20, 27 | `P24` as train_P24_original_21, 28 | `P25` as train_P25_original_22, 29 | `P26` as train_P26_original_23, 30 | `P27` as train_P27_original_24, 31 | `P28` as train_P28_original_25, 32 | `P29` as train_P29_original_26, 33 | `P3` as train_P3_original_27, 34 | `P30` as train_P30_original_28, 35 | `P31` as train_P31_original_29, 36 | `P32` as train_P32_original_30, 37 | `P33` as train_P33_original_31, 38 | `P34` as train_P34_original_32, 39 | `P35` as train_P35_original_33, 40 | `P36` as train_P36_original_34, 41 | `P37` as train_P37_original_35, 42 | `P4` as train_P4_original_36, 43 | `P5` as train_P5_original_37, 44 | `P6` as train_P6_original_38, 45 | `P7` as train_P7_original_39, 46 | `P8` as train_P8_original_40, 47 | `P9` as train_P9_original_41, 48 | `Type` as train_Type_original_42, 49 | `P4` as train_P4_multiply_43, 50 | `P28` as train_P28_multiply_43, 51 | `P2` as train_P2_multiply_43, 52 | avg(`P13`) over train_City_Open_Date_0s_64d_100 as train_P13_window_avg_44, 53 | `P6` as train_P6_combine_45, 54 | `City` as train_City_combine_45, 55 | `P31` as train_P31_combine_46, 56 | `City` as train_City_combine_46, 57 | `P20` as train_P20_combine_47, 58 | `City` as train_City_combine_47, 59 | `P8` as train_P8_combine_48, 60 | `City` as train_City_combine_48, 61 | `P29` as train_P29_divide_49, 62 | `P26` as train_P26_divide_49, 63 | avg(`P13`) over train_City_Open_Date_0s_32d_100 as train_P13_window_avg_50, 64 | `P29` as train_P29_multiply_51, 65 | `P28` as train_P28_multiply_51, 66 | min(`P4`) over train_City_Open_Date_0s_64d_100 as train_P4_window_min_52, 67 | min(`P4`) over train_City_Open_Date_0s_32d_100 as train_P4_window_min_53 68 | from 69 | `train` 70 | window train_City_Open_Date_0s_64d_100 as (partition by `City` order by `Open_Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 71 | train_City_Open_Date_0s_32d_100 as (partition by `City` order by `Open_Date` rows_range between 32d open preceding and 0s preceding MAXSIZE 100); -------------------------------------------------------------------------------- /finance/restaurant-revenue-prediction/tables/restaurant-revenue-prediction_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE train (Id INT, Open_Date TIMESTAMP, City VARCHAR, City_Group VARCHAR, Type VARCHAR, P1 INT, P2 DOUBLE PRECISION, P3 DOUBLE PRECISION, P4 DOUBLE PRECISION, P5 INT, P6 INT, P7 INT, P8 INT, P9 INT, P10 INT, P11 INT, P12 INT, P13 DOUBLE PRECISION, P14 INT, P15 INT, P16 INT, P17 INT, P18 INT, P19 INT, P20 INT, P21 INT, P22 INT, P23 INT, P24 INT, P25 INT, P26 DOUBLE PRECISION, P27 DOUBLE PRECISION, P28 DOUBLE PRECISION, P29 DOUBLE PRECISION, P30 INT, P31 INT, P32 INT, P33 INT, P34 INT, P35 INT, P36 INT, P37 INT, revenue DOUBLE PRECISION); 2 | -------------------------------------------------------------------------------- /finance/robinhood-stock-data/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/robinhood-stock-data/HOOD.csv 2 | -------------------------------------------------------------------------------- /finance/robinhood-stock-data/queries/robinhood-stock-data_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `date` as date_1, 6 | `date` as HOOD_date_original_0, 7 | `close_last` as HOOD_close_last_original_2, 8 | `high` as HOOD_high_original_3, 9 | `low` as HOOD_low_original_4, 10 | `open` as HOOD_open_original_5 11 | from 12 | `HOOD` 13 | ; -------------------------------------------------------------------------------- /finance/robinhood-stock-data/tables/robinhood-stock-data_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE HOOD (date TIMESTAMP, close_last DOUBLE PRECISION, volume INT, open DOUBLE PRECISION, high DOUBLE PRECISION, low DOUBLE PRECISION); 2 | -------------------------------------------------------------------------------- /finance/santander-customer-satisfaction/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/santander-customer-satisfaction/train.csv 2 | -------------------------------------------------------------------------------- /finance/sberbank-russian-housing-market/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/sberbank-russian-housing-market/macro.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/sberbank-russian-housing-market/train.csv 3 | -------------------------------------------------------------------------------- /finance/tiantian/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/finance/tiantian/data_train_less.csv 2 | -------------------------------------------------------------------------------- /health/big-data-derby-2022/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/health/big-data-derby-2022/nyra_race_table.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/health/big-data-derby-2022/nyra_start_table.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/health/big-data-derby-2022/nyra_tracking_table.csv 4 | -------------------------------------------------------------------------------- /health/big-data-derby-2022/tables/big-data-derby-2022_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE nyra_race_table (track_id VARCHAR, race_date TIMESTAMP, race_number INT, distance_id INT, course_type VARCHAR, track_condition VARCHAR, run_up_distance INT, race_type VARCHAR, purse INT, post_time INT); 2 | CREATE TABLE nyra_start_table (track_id VARCHAR, race_date TIMESTAMP, race_number INT, program_number VARCHAR, weight_carried INT, jockey VARCHAR, odds INT); 3 | CREATE TABLE nyra_tracking_table (track_id VARCHAR, race_date TIMESTAMP, race_number INT, program_number VARCHAR, trakus_index INT, latitude DOUBLE PRECISION, longitude DOUBLE PRECISION); 4 | -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-2/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/health/covid19-global-forecasting-week-2/train.csv 2 | -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-2/queries/covid19-global-forecasting-week-2_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `Id` as Id_1, 6 | `Date` as train_Date_original_0, 7 | `Id` as train_Id_original_1, 8 | `ConfirmedCases` as train_ConfirmedCases_original_2, 9 | `Country_Region` as train_Country_Region_original_3, 10 | `Fatalities` as train_Fatalities_original_4, 11 | `Province_State` as train_Province_State_original_5, 12 | `Province_State` as train_Province_State_combine_6, 13 | `Country_Region` as train_Country_Region_combine_6, 14 | min(`Fatalities`) over train_Country_Region_Date_0s_2d_200 as train_Fatalities_window_min_7, 15 | log(`Fatalities`) as train_Fatalities_log_8, 16 | min(`Fatalities`) over train_Country_Region_Date_0s_7d_100 as train_Fatalities_window_min_9, 17 | avg(`Fatalities`) over train_Province_State_Date_0s_2d_200 as train_Fatalities_window_avg_10, 18 | avg(`Fatalities`) over train_Province_State_Date_0s_5d_200 as train_Fatalities_window_avg_11, 19 | sum(`Fatalities`) over train_Province_State_Date_0s_2d_100 as train_Fatalities_window_sum_12, 20 | max(`Fatalities`) over train_Province_State_Date_0_10_ as train_Fatalities_window_max_13, 21 | sum(`Fatalities`) over train_Province_State_Date_0s_7d_100 as train_Fatalities_window_sum_14, 22 | sum(`Fatalities`) over train_Country_Region_Date_0s_5d_200 as train_Fatalities_window_sum_15, 23 | max(`Fatalities`) over train_Country_Region_Date_0_10_ as train_Fatalities_window_max_16, 24 | max(`Fatalities`) over train_Country_Region_Date_0s_2d_200 as train_Fatalities_window_max_17, 25 | sum(`Fatalities`) over train_Country_Region_Date_0s_2d_200 as train_Fatalities_window_sum_18, 26 | min(`Fatalities`) over train_Province_State_Date_0s_32d_200 as train_Fatalities_window_min_19, 27 | avg(`Fatalities`) over train_Country_Region_Date_0s_32d_200 as train_Fatalities_window_avg_20, 28 | avg(`Fatalities`) over train_Country_Region_Date_0_10_ as train_Fatalities_window_avg_21, 29 | fz_top1_ratio(`Province_State`) over train_Country_Region_Date_0s_2d_100 as train_Province_State_window_top1_ratio_22 30 | from 31 | `train` 32 | window train_Country_Region_Date_0s_2d_200 as (partition by `Country_Region` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 200), 33 | train_Country_Region_Date_0s_7d_100 as (partition by `Country_Region` order by `Date` rows_range between 7d open preceding and 0s preceding MAXSIZE 100), 34 | train_Province_State_Date_0s_2d_200 as (partition by `Province_State` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 200), 35 | train_Province_State_Date_0s_5d_200 as (partition by `Province_State` order by `Date` rows_range between 5d open preceding and 0s preceding MAXSIZE 200), 36 | train_Province_State_Date_0s_2d_100 as (partition by `Province_State` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 100), 37 | train_Province_State_Date_0_10_ as (partition by `Province_State` order by `Date` rows between 10 open preceding and 0 preceding), 38 | train_Province_State_Date_0s_7d_100 as (partition by `Province_State` order by `Date` rows_range between 7d open preceding and 0s preceding MAXSIZE 100), 39 | train_Country_Region_Date_0s_5d_200 as (partition by `Country_Region` order by `Date` rows_range between 5d open preceding and 0s preceding MAXSIZE 200), 40 | train_Country_Region_Date_0_10_ as (partition by `Country_Region` order by `Date` rows between 10 open preceding and 0 preceding), 41 | train_Province_State_Date_0s_32d_200 as (partition by `Province_State` order by `Date` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 42 | train_Country_Region_Date_0s_32d_200 as (partition by `Country_Region` order by `Date` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 43 | train_Country_Region_Date_0s_2d_100 as (partition by `Country_Region` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 100); -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-2/tables/covid19-global-forecasting-week-2_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE train (Id INT, Province_State VARCHAR, Country_Region VARCHAR, Date TIMESTAMP, ConfirmedCases DOUBLE PRECISION, Fatalities DOUBLE PRECISION); 2 | -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-3/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/health/covid19-global-forecasting-week-3/train.csv 2 | -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-3/queries/covid19-global-forecasting-week-3_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `Id` as Id_1, 6 | `Date` as train_Date_original_0, 7 | `Id` as train_Id_original_1, 8 | `ConfirmedCases` as train_ConfirmedCases_original_2, 9 | `Country_Region` as train_Country_Region_original_3, 10 | `Fatalities` as train_Fatalities_original_4, 11 | `Province_State` as train_Province_State_original_5, 12 | log(`Fatalities`) as train_Fatalities_log_6, 13 | min(`Fatalities`) over train_Province_State_Date_0s_2d_200 as train_Fatalities_window_min_7, 14 | min(`Fatalities`) over train_Province_State_Date_0s_7d_200 as train_Fatalities_window_min_8, 15 | `Province_State` as train_Province_State_combine_9, 16 | `Country_Region` as train_Country_Region_combine_9, 17 | min(`Fatalities`) over train_Country_Region_Date_0s_2d_100 as train_Fatalities_window_min_10, 18 | min(`Fatalities`) over train_Country_Region_Date_0s_5d_200 as train_Fatalities_window_min_11, 19 | avg(`Fatalities`) over train_Province_State_Date_0s_2d_200 as train_Fatalities_window_avg_12, 20 | avg(`Fatalities`) over train_Province_State_Date_0s_5d_200 as train_Fatalities_window_avg_13, 21 | sum(`Fatalities`) over train_Province_State_Date_0s_7d_100 as train_Fatalities_window_sum_14, 22 | avg(`Fatalities`) over train_Country_Region_Date_0s_2d_100 as train_Fatalities_window_avg_15, 23 | max(`Fatalities`) over train_Country_Region_Date_0s_5d_200 as train_Fatalities_window_max_16, 24 | max(`Fatalities`) over train_Country_Region_Date_0s_14d_200 as train_Fatalities_window_max_17, 25 | avg(`Fatalities`) over train_Country_Region_Date_0_10_ as train_Fatalities_window_avg_18, 26 | fz_top1_ratio(`Province_State`) over train_Country_Region_Date_0_10_ as train_Province_State_window_top1_ratio_19, 27 | case when !isnull(at(`Country_Region`, 0)) over train_Province_State_Date_0s_14d_100 then count_where(`Country_Region`, `Country_Region` = at(`Country_Region`, 0)) over train_Province_State_Date_0s_14d_100 else null end as train_Country_Region_window_count_20, 28 | case when !isnull(at(`Country_Region`, 0)) over train_Province_State_Date_0s_64d_100 then count_where(`Country_Region`, `Country_Region` = at(`Country_Region`, 0)) over train_Province_State_Date_0s_64d_100 else null end as train_Country_Region_window_count_21, 29 | dayofweek(timestamp(`Date`)) as train_Date_dayofweek_22, 30 | case when 1 < dayofweek(timestamp(`Date`)) and dayofweek(timestamp(`Date`)) < 7 then 1 else 0 end as train_Date_isweekday_23 31 | from 32 | `train` 33 | window train_Province_State_Date_0s_2d_200 as (partition by `Province_State` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 200), 34 | train_Province_State_Date_0s_7d_200 as (partition by `Province_State` order by `Date` rows_range between 7d open preceding and 0s preceding MAXSIZE 200), 35 | train_Country_Region_Date_0s_2d_100 as (partition by `Country_Region` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 100), 36 | train_Country_Region_Date_0s_5d_200 as (partition by `Country_Region` order by `Date` rows_range between 5d open preceding and 0s preceding MAXSIZE 200), 37 | train_Province_State_Date_0s_5d_200 as (partition by `Province_State` order by `Date` rows_range between 5d open preceding and 0s preceding MAXSIZE 200), 38 | train_Province_State_Date_0s_7d_100 as (partition by `Province_State` order by `Date` rows_range between 7d open preceding and 0s preceding MAXSIZE 100), 39 | train_Country_Region_Date_0s_14d_200 as (partition by `Country_Region` order by `Date` rows_range between 14d open preceding and 0s preceding MAXSIZE 200), 40 | train_Country_Region_Date_0_10_ as (partition by `Country_Region` order by `Date` rows between 10 open preceding and 0 preceding), 41 | train_Province_State_Date_0s_14d_100 as (partition by `Province_State` order by `Date` rows_range between 14d open preceding and 0s preceding MAXSIZE 100), 42 | train_Province_State_Date_0s_64d_100 as (partition by `Province_State` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100); -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-3/tables/covid19-global-forecasting-week-3_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE train (Id INT, Province_State VARCHAR, Country_Region VARCHAR, Date TIMESTAMP, ConfirmedCases DOUBLE PRECISION, Fatalities DOUBLE PRECISION); 2 | -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-4/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/health/covid19-global-forecasting-week-4/train.csv 2 | -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-4/queries/covid19-global-forecasting-week-4_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `Id` as Id_1, 6 | `Date` as train_Date_original_0, 7 | `Id` as train_Id_original_1, 8 | `ConfirmedCases` as train_ConfirmedCases_original_2, 9 | `Country_Region` as train_Country_Region_original_3, 10 | `Fatalities` as train_Fatalities_original_4, 11 | `Province_State` as train_Province_State_original_5, 12 | log(`Fatalities`) as train_Fatalities_log_6, 13 | min(`Fatalities`) over train_Country_Region_Date_0s_2d_200 as train_Fatalities_window_min_7, 14 | min(`Fatalities`) over train_Province_State_Date_0s_2d_100 as train_Fatalities_window_min_8, 15 | sum(`Fatalities`) over train_Country_Region_Date_0s_2d_100 as train_Fatalities_window_sum_9, 16 | min(`Fatalities`) over train_Country_Region_Date_0s_7d_100 as train_Fatalities_window_min_10, 17 | min(`Fatalities`) over train_Province_State_Date_0s_5d_200 as train_Fatalities_window_min_11, 18 | sum(`Fatalities`) over train_Country_Region_Date_0s_7d_100 as train_Fatalities_window_sum_12, 19 | avg(`Fatalities`) over train_Country_Region_Date_0s_5d_100 as train_Fatalities_window_avg_13, 20 | avg(`Fatalities`) over train_Country_Region_Date_0s_2d_200 as train_Fatalities_window_avg_14, 21 | fz_top1_ratio(`Country_Region`) over train_Province_State_Date_0s_64d_100 as train_Country_Region_window_top1_ratio_15, 22 | max(`Fatalities`) over train_Country_Region_Date_0_10_ as train_Fatalities_window_max_16, 23 | case when !isnull(at(`Country_Region`, 0)) over train_Province_State_Date_0s_64d_200 then count_where(`Country_Region`, `Country_Region` = at(`Country_Region`, 0)) over train_Province_State_Date_0s_64d_200 else null end as train_Country_Region_window_count_17, 24 | case when !isnull(at(`Country_Region`, 0)) over train_Province_State_Date_0s_14d_200 then count_where(`Country_Region`, `Country_Region` = at(`Country_Region`, 0)) over train_Province_State_Date_0s_14d_200 else null end as train_Country_Region_window_count_18, 25 | case when !isnull(at(`Province_State`, 0)) over train_Country_Region_Date_0_10_ then count_where(`Province_State`, `Province_State` = at(`Province_State`, 0)) over train_Country_Region_Date_0_10_ else null end as train_Province_State_window_count_19, 26 | case when !isnull(at(`Province_State`, 0)) over train_Country_Region_Date_0s_2d_200 then count_where(`Province_State`, `Province_State` = at(`Province_State`, 0)) over train_Country_Region_Date_0s_2d_200 else null end as train_Province_State_window_count_20, 27 | fz_top1_ratio(`Province_State`) over train_Country_Region_Date_0_10_ as train_Province_State_window_top1_ratio_21, 28 | distinct_count(`Province_State`) over train_Country_Region_Date_0_10_ as train_Province_State_window_unique_count_22, 29 | max(`Fatalities`) over train_Country_Region_Date_0s_32d_200 as train_Fatalities_window_max_23 30 | from 31 | `train` 32 | window train_Country_Region_Date_0s_2d_200 as (partition by `Country_Region` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 200), 33 | train_Province_State_Date_0s_2d_100 as (partition by `Province_State` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 100), 34 | train_Country_Region_Date_0s_2d_100 as (partition by `Country_Region` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 100), 35 | train_Country_Region_Date_0s_7d_100 as (partition by `Country_Region` order by `Date` rows_range between 7d open preceding and 0s preceding MAXSIZE 100), 36 | train_Province_State_Date_0s_5d_200 as (partition by `Province_State` order by `Date` rows_range between 5d open preceding and 0s preceding MAXSIZE 200), 37 | train_Country_Region_Date_0s_5d_100 as (partition by `Country_Region` order by `Date` rows_range between 5d open preceding and 0s preceding MAXSIZE 100), 38 | train_Province_State_Date_0s_64d_100 as (partition by `Province_State` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 39 | train_Country_Region_Date_0_10_ as (partition by `Country_Region` order by `Date` rows between 10 open preceding and 0 preceding), 40 | train_Province_State_Date_0s_64d_200 as (partition by `Province_State` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 41 | train_Province_State_Date_0s_14d_200 as (partition by `Province_State` order by `Date` rows_range between 14d open preceding and 0s preceding MAXSIZE 200), 42 | train_Country_Region_Date_0s_32d_200 as (partition by `Country_Region` order by `Date` rows_range between 32d open preceding and 0s preceding MAXSIZE 200); -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-4/tables/covid19-global-forecasting-week-4_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE train (Id INT, Province_State VARCHAR, Country_Region VARCHAR, Date TIMESTAMP, ConfirmedCases DOUBLE PRECISION, Fatalities DOUBLE PRECISION); 2 | -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-5/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/health/covid19-global-forecasting-week-5/train.csv 2 | -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-5/queries/covid19-global-forecasting-week-5_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `Id` as Id_1, 6 | `Date` as train_Date_original_0, 7 | `Id` as train_Id_original_1, 8 | `TargetValue` as train_TargetValue_original_2, 9 | `Country_Region` as train_Country_Region_original_3, 10 | `County` as train_County_original_4, 11 | `Population` as train_Population_original_5, 12 | `Province_State` as train_Province_State_original_6, 13 | `Target` as train_Target_original_7, 14 | `Weight` as train_Weight_original_8, 15 | fz_top1_ratio(`Target`) over train_Province_State_Date_0s_64d_100 as train_Target_window_top1_ratio_9, 16 | dayofweek(timestamp(`Date`)) as train_Date_dayofweek_10, 17 | case when !isnull(at(`Target`, 0)) over train_Province_State_Date_0s_7d_100 then count_where(`Target`, `Target` = at(`Target`, 0)) over train_Province_State_Date_0s_7d_100 else null end as train_Target_window_count_11, 18 | case when !isnull(at(`Target`, 0)) over train_Province_State_Date_0s_5d_100 then count_where(`Target`, `Target` = at(`Target`, 0)) over train_Province_State_Date_0s_5d_100 else null end as train_Target_window_count_12, 19 | fz_top1_ratio(`Target`) over train_Province_State_Date_0s_5d_100 as train_Target_window_top1_ratio_13, 20 | distinct_count(`County`) over train_Target_Date_0_10_ as train_County_window_unique_count_14, 21 | fz_top1_ratio(`County`) over train_Target_Date_0_10_ as train_County_window_top1_ratio_15, 22 | case when !isnull(at(`Target`, 0)) over train_County_Date_0_10_ then count_where(`Target`, `Target` = at(`Target`, 0)) over train_County_Date_0_10_ else null end as train_Target_window_count_16, 23 | fz_top1_ratio(`County`) over train_Target_Date_0s_64d_100 as train_County_window_top1_ratio_17, 24 | case when !isnull(at(`Target`, 0)) over train_Population_Date_0s_2d_200 then count_where(`Target`, `Target` = at(`Target`, 0)) over train_Population_Date_0s_2d_200 else null end as train_Target_window_count_18, 25 | distinct_count(`Population`) over train_Target_Date_0s_64d_100 as train_Population_window_unique_count_19, 26 | case when !isnull(at(`Population`, 0)) over train_Province_State_Date_0_10_ then count_where(`Population`, `Population` = at(`Population`, 0)) over train_Province_State_Date_0_10_ else null end as train_Population_window_count_20, 27 | case when !isnull(at(`Population`, 0)) over train_Province_State_Date_0s_2d_100 then count_where(`Population`, `Population` = at(`Population`, 0)) over train_Province_State_Date_0s_2d_100 else null end as train_Population_window_count_21, 28 | distinct_count(`Population`) over train_Province_State_Date_0_10_ as train_Population_window_unique_count_22, 29 | fz_top1_ratio(`Province_State`) over train_Target_Date_0_10_ as train_Province_State_window_top1_ratio_23, 30 | fz_top1_ratio(`Province_State`) over train_Target_Date_0s_64d_100 as train_Province_State_window_top1_ratio_24, 31 | case when !isnull(at(`Population`, 0)) over train_Country_Region_Date_0s_2d_100 then count_where(`Population`, `Population` = at(`Population`, 0)) over train_Country_Region_Date_0s_2d_100 else null end as train_Population_window_count_25, 32 | min(`Weight`) over train_Target_Date_0s_64d_200 as train_Weight_window_min_26 33 | from 34 | `train` 35 | window train_Province_State_Date_0s_64d_100 as (partition by `Province_State` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 36 | train_Province_State_Date_0s_7d_100 as (partition by `Province_State` order by `Date` rows_range between 7d open preceding and 0s preceding MAXSIZE 100), 37 | train_Province_State_Date_0s_5d_100 as (partition by `Province_State` order by `Date` rows_range between 5d open preceding and 0s preceding MAXSIZE 100), 38 | train_Target_Date_0_10_ as (partition by `Target` order by `Date` rows between 10 open preceding and 0 preceding), 39 | train_County_Date_0_10_ as (partition by `County` order by `Date` rows between 10 open preceding and 0 preceding), 40 | train_Target_Date_0s_64d_100 as (partition by `Target` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 41 | train_Population_Date_0s_2d_200 as (partition by `Population` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 200), 42 | train_Province_State_Date_0_10_ as (partition by `Province_State` order by `Date` rows between 10 open preceding and 0 preceding), 43 | train_Province_State_Date_0s_2d_100 as (partition by `Province_State` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 100), 44 | train_Country_Region_Date_0s_2d_100 as (partition by `Country_Region` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 100), 45 | train_Target_Date_0s_64d_200 as (partition by `Target` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 200); -------------------------------------------------------------------------------- /health/covid19-global-forecasting-week-5/tables/covid19-global-forecasting-week-5_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE train (Id INT, County VARCHAR, Province_State VARCHAR, Country_Region VARCHAR, Population INT, Weight DOUBLE PRECISION, Date TIMESTAMP, Target VARCHAR, TargetValue INT); 2 | -------------------------------------------------------------------------------- /health/predict-west-nile-virus/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/health/predict-west-nile-virus/spray.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/health/predict-west-nile-virus/train.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/health/predict-west-nile-virus/weather.csv 4 | -------------------------------------------------------------------------------- /health/predict-west-nile-virus/queries/predict-west-nile-virus_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `Id` as Id_1, 7 | `Date` as train_Date_original_0, 8 | `Id` as train_Id_original_1, 9 | `WnvPresent` as train_WnvPresent_original_2, 10 | `Address` as train_Address_original_23, 11 | `AddressAccuracy` as train_AddressAccuracy_original_24, 12 | `Block` as train_Block_original_25, 13 | `Latitude` as train_Latitude_original_26, 14 | `Longitude` as train_Longitude_original_27, 15 | `NumMosquitos` as train_NumMosquitos_original_28, 16 | `Species` as train_Species_original_29, 17 | `Street` as train_Street_original_30, 18 | `Trap` as train_Trap_original_31 19 | from 20 | `train` 21 | ) 22 | as out0 23 | last join 24 | ( 25 | select 26 | `train`.`Id` as Id_4, 27 | `spray_Date`.`Latitude` as spray_Latitude_multi_direct_3, 28 | `weather_Date`.`AvgSpeed` as weather_AvgSpeed_multi_direct_4, 29 | `weather_Date`.`CodeSum` as weather_CodeSum_multi_direct_5, 30 | `weather_Date`.`Cool` as weather_Cool_multi_direct_6, 31 | `weather_Date`.`Depart` as weather_Depart_multi_direct_7, 32 | `weather_Date`.`Depth` as weather_Depth_multi_direct_8, 33 | `weather_Date`.`DewPoint` as weather_DewPoint_multi_direct_9, 34 | `weather_Date`.`Heat` as weather_Heat_multi_direct_10, 35 | `weather_Date`.`PrecipTotal` as weather_PrecipTotal_multi_direct_11, 36 | `weather_Date`.`ResultDir` as weather_ResultDir_multi_direct_12, 37 | `weather_Date`.`ResultSpeed` as weather_ResultSpeed_multi_direct_13, 38 | `weather_Date`.`SeaLevel` as weather_SeaLevel_multi_direct_14, 39 | `weather_Date`.`SnowFall` as weather_SnowFall_multi_direct_15, 40 | `weather_Date`.`StnPressure` as weather_StnPressure_multi_direct_16, 41 | `weather_Date`.`Sunrise` as weather_Sunrise_multi_direct_17, 42 | `weather_Date`.`Sunset` as weather_Sunset_multi_direct_18, 43 | `weather_Date`.`Tavg` as weather_Tavg_multi_direct_19, 44 | `weather_Date`.`Tmax` as weather_Tmax_multi_direct_20, 45 | `weather_Date`.`Tmin` as weather_Tmin_multi_direct_21, 46 | `weather_Date`.`WetBulb` as weather_WetBulb_multi_direct_22 47 | from 48 | `train` 49 | last join `spray` as `spray_Date` on `train`.`Date` = `spray_Date`.`Date` 50 | last join `weather` as `weather_Date` on `train`.`Date` = `weather_Date`.`Date`) 51 | as out1 52 | on out0.Id_1 = out1.Id_4 53 | ; -------------------------------------------------------------------------------- /health/predict-west-nile-virus/tables/predict-west-nile-virus_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE spray (Date TIMESTAMP, Time VARCHAR, Latitude DOUBLE PRECISION, Longitude DOUBLE PRECISION); 2 | CREATE TABLE train (Date TIMESTAMP, Address VARCHAR, Species VARCHAR, Block INT, Street VARCHAR, Trap VARCHAR, AddressNumberAndStreet VARCHAR, Latitude DOUBLE PRECISION, Longitude DOUBLE PRECISION, AddressAccuracy INT, NumMosquitos INT, WnvPresent INT); 3 | CREATE TABLE weather (Station INT, Date TIMESTAMP, Tmax INT, Tmin INT, Tavg VARCHAR, Depart VARCHAR, DewPoint INT, WetBulb VARCHAR, Heat VARCHAR, Cool VARCHAR, Sunrise VARCHAR, Sunset VARCHAR, CodeSum VARCHAR, Depth VARCHAR, Water1 VARCHAR, SnowFall VARCHAR, PrecipTotal VARCHAR, StnPressure VARCHAR, SeaLevel VARCHAR, ResultSpeed DOUBLE PRECISION, ResultDir INT, AvgSpeed VARCHAR); 4 | -------------------------------------------------------------------------------- /media/detecting-insults-in-social-commentary/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/media/detecting-insults-in-social-commentary/impermium_verification_labels.csv 2 | -------------------------------------------------------------------------------- /media/detecting-insults-in-social-commentary/queries/detecting-insults-in-social-commentary_query_classification.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `id` as id_1, 6 | `Date` as impermium_verification_labels_Date_original_0, 7 | `id` as impermium_verification_labels_id_original_1, 8 | `Insult` as impermium_verification_labels_Insult_original_2, 9 | `Comment` as impermium_verification_labels_Comment_original_3, 10 | `Usage` as impermium_verification_labels_Usage_original_4, 11 | distinct_count(`Comment`) over impermium_verification_labels_Usage_Date_0s_5h_200 as impermium_verification_labels_Comment_window_unique_count_5, 12 | fz_top1_ratio(`Comment`) over impermium_verification_labels_Usage_Date_0s_1h_100 as impermium_verification_labels_Comment_window_top1_ratio_6, 13 | distinct_count(`Comment`) over impermium_verification_labels_Usage_Date_0s_2h_200 as impermium_verification_labels_Comment_window_unique_count_7, 14 | `Usage` as impermium_verification_labels_Usage_combine_8, 15 | `Comment` as impermium_verification_labels_Comment_combine_8, 16 | fz_top1_ratio(`Comment`) over impermium_verification_labels_Usage_Date_0s_2d_100 as impermium_verification_labels_Comment_window_top1_ratio_9, 17 | fz_top1_ratio(`Usage`) over impermium_verification_labels_Comment_Date_0s_64d_200 as impermium_verification_labels_Usage_window_top1_ratio_10, 18 | fz_top1_ratio(`Usage`) over impermium_verification_labels_Comment_Date_0s_7d_200 as impermium_verification_labels_Usage_window_top1_ratio_11, 19 | dayofweek(timestamp(`Date`)) as impermium_verification_labels_Date_dayofweek_12, 20 | distinct_count(`Usage`) over impermium_verification_labels_Comment_Date_0s_64d_200 as impermium_verification_labels_Usage_window_unique_count_13 21 | from 22 | `impermium_verification_labels` 23 | window impermium_verification_labels_Usage_Date_0s_5h_200 as (partition by `Usage` order by `Date` rows_range between 5h open preceding and 0s preceding MAXSIZE 200), 24 | impermium_verification_labels_Usage_Date_0s_1h_100 as (partition by `Usage` order by `Date` rows_range between 1h open preceding and 0s preceding MAXSIZE 100), 25 | impermium_verification_labels_Usage_Date_0s_2h_200 as (partition by `Usage` order by `Date` rows_range between 2h open preceding and 0s preceding MAXSIZE 200), 26 | impermium_verification_labels_Usage_Date_0s_2d_100 as (partition by `Usage` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 100), 27 | impermium_verification_labels_Comment_Date_0s_64d_200 as (partition by `Comment` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 28 | impermium_verification_labels_Comment_Date_0s_7d_200 as (partition by `Comment` order by `Date` rows_range between 7d open preceding and 0s preceding MAXSIZE 200); -------------------------------------------------------------------------------- /media/detecting-insults-in-social-commentary/queries/detecting-insults-in-social-commentary_query_regression.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `id` as id_1, 6 | `Date` as impermium_verification_labels_Date_original_0, 7 | `id` as impermium_verification_labels_id_original_1, 8 | `Insult` as impermium_verification_labels_Insult_original_2, 9 | `Comment` as impermium_verification_labels_Comment_original_3, 10 | `Usage` as impermium_verification_labels_Usage_original_4, 11 | distinct_count(`Comment`) over impermium_verification_labels_Usage_Date_0s_10h_200 as impermium_verification_labels_Comment_window_unique_count_5, 12 | fz_top1_ratio(`Comment`) over impermium_verification_labels_Usage_Date_0s_10h_200 as impermium_verification_labels_Comment_window_top1_ratio_6, 13 | fz_top1_ratio(`Comment`) over impermium_verification_labels_Usage_Date_0s_2d_200 as impermium_verification_labels_Comment_window_top1_ratio_7, 14 | distinct_count(`Comment`) over impermium_verification_labels_Usage_Date_0s_2d_200 as impermium_verification_labels_Comment_window_unique_count_8, 15 | `Comment` as impermium_verification_labels_Comment_combine_9, 16 | `Usage` as impermium_verification_labels_Usage_combine_9, 17 | case when !isnull(at(`Comment`, 0)) over impermium_verification_labels_Usage_Date_0s_32d_100 then count_where(`Comment`, `Comment` = at(`Comment`, 0)) over impermium_verification_labels_Usage_Date_0s_32d_100 else null end as impermium_verification_labels_Comment_window_count_10, 18 | dayofweek(timestamp(`Date`)) as impermium_verification_labels_Date_dayofweek_11, 19 | case when 1 < dayofweek(timestamp(`Date`)) and dayofweek(timestamp(`Date`)) < 7 then 1 else 0 end as impermium_verification_labels_Date_isweekday_12, 20 | hour(timestamp(`Date`)) as impermium_verification_labels_Date_hourofday_13, 21 | case when !isnull(at(`Usage`, 0)) over impermium_verification_labels_Comment_Date_0s_32d_200 then count_where(`Usage`, `Usage` = at(`Usage`, 0)) over impermium_verification_labels_Comment_Date_0s_32d_200 else null end as impermium_verification_labels_Usage_window_count_14, 22 | distinct_count(`Usage`) over impermium_verification_labels_Comment_Date_0s_5d_100 as impermium_verification_labels_Usage_window_unique_count_15, 23 | distinct_count(`Usage`) over impermium_verification_labels_Comment_Date_0s_64d_200 as impermium_verification_labels_Usage_window_unique_count_16, 24 | case when !isnull(at(`Usage`, 0)) over impermium_verification_labels_Comment_Date_0s_10h_100 then count_where(`Usage`, `Usage` = at(`Usage`, 0)) over impermium_verification_labels_Comment_Date_0s_10h_100 else null end as impermium_verification_labels_Usage_window_count_17, 25 | fz_top1_ratio(`Usage`) over impermium_verification_labels_Comment_Date_0s_32d_200 as impermium_verification_labels_Usage_window_top1_ratio_18, 26 | fz_top1_ratio(`Usage`) over impermium_verification_labels_Comment_Date_0s_5d_100 as impermium_verification_labels_Usage_window_top1_ratio_19 27 | from 28 | `impermium_verification_labels` 29 | window impermium_verification_labels_Usage_Date_0s_10h_200 as (partition by `Usage` order by `Date` rows_range between 10h open preceding and 0s preceding MAXSIZE 200), 30 | impermium_verification_labels_Usage_Date_0s_2d_200 as (partition by `Usage` order by `Date` rows_range between 2d open preceding and 0s preceding MAXSIZE 200), 31 | impermium_verification_labels_Usage_Date_0s_32d_100 as (partition by `Usage` order by `Date` rows_range between 32d open preceding and 0s preceding MAXSIZE 100), 32 | impermium_verification_labels_Comment_Date_0s_32d_200 as (partition by `Comment` order by `Date` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 33 | impermium_verification_labels_Comment_Date_0s_5d_100 as (partition by `Comment` order by `Date` rows_range between 5d open preceding and 0s preceding MAXSIZE 100), 34 | impermium_verification_labels_Comment_Date_0s_64d_200 as (partition by `Comment` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 35 | impermium_verification_labels_Comment_Date_0s_10h_100 as (partition by `Comment` order by `Date` rows_range between 10h open preceding and 0s preceding MAXSIZE 100); -------------------------------------------------------------------------------- /media/detecting-insults-in-social-commentary/tables/detecting-insults-in-social-commentary_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE impermium_verification_labels 2 | ( 3 | id INT, 4 | Insult INT, 5 | Date VARCHAR, 6 | Comment VARCHAR, 7 | Usage VARCHAR 8 | ); 9 | -------------------------------------------------------------------------------- /media/spotify-app-reviews-2022/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/media/spotify-app-reviews-2022/reviews.csv 2 | -------------------------------------------------------------------------------- /media/spotify-app-reviews-2022/queries/spotify-app-reviews-2022_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `id` as id_1, 6 | `Time_submitted` as reviews_Time_submitted_original_0, 7 | `id` as reviews_id_original_1, 8 | `Total_thumbsup` as reviews_Total_thumbsup_original_2, 9 | `Rating` as reviews_Rating_original_3, 10 | `Reply` as reviews_Reply_original_4, 11 | `Review` as reviews_Review_original_5, 12 | `Rating` as reviews_Rating_combine_6, 13 | `Reply` as reviews_Reply_combine_6, 14 | case when !isnull(at(`Rating`, 0)) over reviews_Reply_Time_submitted_0s_2d_200 then count_where(`Rating`, `Rating` = at(`Rating`, 0)) over reviews_Reply_Time_submitted_0s_2d_200 else null end as reviews_Rating_window_count_7, 15 | case when !isnull(at(`Rating`, 0)) over reviews_Reply_Time_submitted_0s_7d_200 then count_where(`Rating`, `Rating` = at(`Rating`, 0)) over reviews_Reply_Time_submitted_0s_7d_200 else null end as reviews_Rating_window_count_8, 16 | distinct_count(`Rating`) over reviews_Reply_Time_submitted_0_10_ as reviews_Rating_window_unique_count_9, 17 | distinct_count(`Review`) over reviews_Reply_Time_submitted_0s_64d_200 as reviews_Review_window_unique_count_10, 18 | distinct_count(`Review`) over reviews_Reply_Time_submitted_0s_7d_200 as reviews_Review_window_unique_count_11, 19 | distinct_count(`Rating`) over reviews_Reply_Time_submitted_0s_10h_200 as reviews_Rating_window_unique_count_12, 20 | fz_top1_ratio(`Review`) over reviews_Reply_Time_submitted_0s_64d_200 as reviews_Review_window_top1_ratio_13, 21 | fz_top1_ratio(`Review`) over reviews_Reply_Time_submitted_0s_7d_200 as reviews_Review_window_top1_ratio_14, 22 | case when !isnull(at(`Rating`, 0)) over reviews_Review_Time_submitted_0s_64d_100 then count_where(`Rating`, `Rating` = at(`Rating`, 0)) over reviews_Review_Time_submitted_0s_64d_100 else null end as reviews_Rating_window_count_15, 23 | case when !isnull(at(`Rating`, 0)) over reviews_Review_Time_submitted_0s_14d_200 then count_where(`Rating`, `Rating` = at(`Rating`, 0)) over reviews_Review_Time_submitted_0s_14d_200 else null end as reviews_Rating_window_count_16, 24 | distinct_count(`Rating`) over reviews_Review_Time_submitted_0_10_ as reviews_Rating_window_unique_count_17, 25 | distinct_count(`Rating`) over reviews_Review_Time_submitted_0s_2d_100 as reviews_Rating_window_unique_count_18, 26 | case when !isnull(at(`Review`, 0)) over reviews_Reply_Time_submitted_0s_64d_100 then count_where(`Review`, `Review` = at(`Review`, 0)) over reviews_Reply_Time_submitted_0s_64d_100 else null end as reviews_Review_window_count_19, 27 | case when !isnull(at(`Review`, 0)) over reviews_Reply_Time_submitted_0s_5h_100 then count_where(`Review`, `Review` = at(`Review`, 0)) over reviews_Reply_Time_submitted_0s_5h_100 else null end as reviews_Review_window_count_20, 28 | distinct_count(`Review`) over reviews_Rating_Time_submitted_0s_64d_100 as reviews_Review_window_unique_count_21, 29 | fz_top1_ratio(`Review`) over reviews_Rating_Time_submitted_0_10_ as reviews_Review_window_top1_ratio_22, 30 | distinct_count(`Review`) over reviews_Rating_Time_submitted_0_10_ as reviews_Review_window_unique_count_23 31 | from 32 | `reviews` 33 | window reviews_Reply_Time_submitted_0s_2d_200 as (partition by `Reply` order by `Time_submitted` rows_range between 2d open preceding and 0s preceding MAXSIZE 200), 34 | reviews_Reply_Time_submitted_0s_7d_200 as (partition by `Reply` order by `Time_submitted` rows_range between 7d open preceding and 0s preceding MAXSIZE 200), 35 | reviews_Reply_Time_submitted_0_10_ as (partition by `Reply` order by `Time_submitted` rows between 10 open preceding and 0 preceding), 36 | reviews_Reply_Time_submitted_0s_64d_200 as (partition by `Reply` order by `Time_submitted` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 37 | reviews_Reply_Time_submitted_0s_10h_200 as (partition by `Reply` order by `Time_submitted` rows_range between 10h open preceding and 0s preceding MAXSIZE 200), 38 | reviews_Review_Time_submitted_0s_64d_100 as (partition by `Review` order by `Time_submitted` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 39 | reviews_Review_Time_submitted_0s_14d_200 as (partition by `Review` order by `Time_submitted` rows_range between 14d open preceding and 0s preceding MAXSIZE 200), 40 | reviews_Review_Time_submitted_0_10_ as (partition by `Review` order by `Time_submitted` rows between 10 open preceding and 0 preceding), 41 | reviews_Review_Time_submitted_0s_2d_100 as (partition by `Review` order by `Time_submitted` rows_range between 2d open preceding and 0s preceding MAXSIZE 100), 42 | reviews_Reply_Time_submitted_0s_64d_100 as (partition by `Reply` order by `Time_submitted` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 43 | reviews_Reply_Time_submitted_0s_5h_100 as (partition by `Reply` order by `Time_submitted` rows_range between 5h open preceding and 0s preceding MAXSIZE 100), 44 | reviews_Rating_Time_submitted_0s_64d_100 as (partition by `Rating` order by `Time_submitted` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 45 | reviews_Rating_Time_submitted_0_10_ as (partition by `Rating` order by `Time_submitted` rows between 10 open preceding and 0 preceding); -------------------------------------------------------------------------------- /media/spotify-app-reviews-2022/tables/spotify-app-reviews-2022_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE reviews (id INT, Time_submitted TIMESTAMP, Review VARCHAR, Rating INT, Total_thumbsup INT, Reply VARCHAR); 2 | -------------------------------------------------------------------------------- /media/twitter-threads/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/media/twitter-threads/fifteen_twenty.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/media/twitter-threads/five_ten.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/media/twitter-threads/ten_fifteen.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/media/twitter-threads/twenty_twentyfive.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/media/twitter-threads/twentyfive_thirty.csv 6 | -------------------------------------------------------------------------------- /media/twitter-threads/queries/twitter-threads_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `id` as id_1, 6 | `timestamp` as five_ten_timestamp_original_0, 7 | `id` as five_ten_id_original_1, 8 | `likes` as five_ten_likes_original_2, 9 | `replies` as five_ten_replies_original_3, 10 | `retweets` as five_ten_retweets_original_4, 11 | `text` as five_ten_text_original_5, 12 | `thread_number` as five_ten_thread_number_original_6, 13 | case when !isnull(at(`retweets`, 0)) over five_ten_replies_timestamp_0s_64d_200 then count_where(`retweets`, `retweets` = at(`retweets`, 0)) over five_ten_replies_timestamp_0s_64d_200 else null end as five_ten_retweets_window_count_7, 14 | case when !isnull(at(`retweets`, 0)) over five_ten_replies_timestamp_0s_14d_200 then count_where(`retweets`, `retweets` = at(`retweets`, 0)) over five_ten_replies_timestamp_0s_14d_200 else null end as five_ten_retweets_window_count_8, 15 | case when !isnull(at(`replies`, 0)) over five_ten_retweets_timestamp_0s_32d_200 then count_where(`replies`, `replies` = at(`replies`, 0)) over five_ten_retweets_timestamp_0s_32d_200 else null end as five_ten_replies_window_count_9, 16 | case when !isnull(at(`replies`, 0)) over five_ten_retweets_timestamp_0_10_ then count_where(`replies`, `replies` = at(`replies`, 0)) over five_ten_retweets_timestamp_0_10_ else null end as five_ten_replies_window_count_10, 17 | case when !isnull(at(`replies`, 0)) over five_ten_thread_number_timestamp_0s_1h_200 then count_where(`replies`, `replies` = at(`replies`, 0)) over five_ten_thread_number_timestamp_0s_1h_200 else null end as five_ten_replies_window_count_11, 18 | case when !isnull(at(`replies`, 0)) over five_ten_thread_number_timestamp_0s_2h_100 then count_where(`replies`, `replies` = at(`replies`, 0)) over five_ten_thread_number_timestamp_0s_2h_100 else null end as five_ten_replies_window_count_12, 19 | fz_top1_ratio(`text`) over five_ten_retweets_timestamp_0s_64d_200 as five_ten_text_window_top1_ratio_13, 20 | fz_top1_ratio(`text`) over five_ten_retweets_timestamp_0s_14d_200 as five_ten_text_window_top1_ratio_14, 21 | case when !isnull(at(`retweets`, 0)) over five_ten_thread_number_timestamp_0s_2h_100 then count_where(`retweets`, `retweets` = at(`retweets`, 0)) over five_ten_thread_number_timestamp_0s_2h_100 else null end as five_ten_retweets_window_count_15, 22 | fz_top1_ratio(`retweets`) over five_ten_replies_timestamp_0_10_ as five_ten_retweets_window_top1_ratio_16, 23 | fz_top1_ratio(`replies`) over five_ten_retweets_timestamp_0s_32d_200 as five_ten_replies_window_top1_ratio_17, 24 | fz_top1_ratio(`text`) over five_ten_replies_timestamp_0_10_ as five_ten_text_window_top1_ratio_18, 25 | fz_top1_ratio(`replies`) over five_ten_thread_number_timestamp_0s_5h_100 as five_ten_replies_window_top1_ratio_19, 26 | distinct_count(`id`) over five_ten_replies_timestamp_0_10_ as five_ten_id_window_unique_count_20, 27 | distinct_count(`thread_number`) over five_ten_replies_timestamp_0s_14d_200 as five_ten_thread_number_window_unique_count_21, 28 | case when !isnull(at(`id`, 0)) over five_ten_retweets_timestamp_0s_32d_100 then count_where(`id`, `id` = at(`id`, 0)) over five_ten_retweets_timestamp_0s_32d_100 else null end as five_ten_id_window_count_22, 29 | fz_top1_ratio(`thread_number`) over five_ten_retweets_timestamp_0s_32d_200 as five_ten_thread_number_window_top1_ratio_23, 30 | fz_top1_ratio(`replies`) over five_ten_thread_number_timestamp_0s_10h_200 as five_ten_replies_window_top1_ratio_24 31 | from 32 | `five_ten` 33 | window five_ten_replies_timestamp_0s_64d_200 as (partition by `replies` order by `timestamp` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 34 | five_ten_replies_timestamp_0s_14d_200 as (partition by `replies` order by `timestamp` rows_range between 14d open preceding and 0s preceding MAXSIZE 200), 35 | five_ten_retweets_timestamp_0s_32d_200 as (partition by `retweets` order by `timestamp` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 36 | five_ten_retweets_timestamp_0_10_ as (partition by `retweets` order by `timestamp` rows between 10 open preceding and 0 preceding), 37 | five_ten_thread_number_timestamp_0s_1h_200 as (partition by `thread_number` order by `timestamp` rows_range between 1h open preceding and 0s preceding MAXSIZE 200), 38 | five_ten_thread_number_timestamp_0s_2h_100 as (partition by `thread_number` order by `timestamp` rows_range between 2h open preceding and 0s preceding MAXSIZE 100), 39 | five_ten_retweets_timestamp_0s_64d_200 as (partition by `retweets` order by `timestamp` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 40 | five_ten_retweets_timestamp_0s_14d_200 as (partition by `retweets` order by `timestamp` rows_range between 14d open preceding and 0s preceding MAXSIZE 200), 41 | five_ten_replies_timestamp_0_10_ as (partition by `replies` order by `timestamp` rows between 10 open preceding and 0 preceding), 42 | five_ten_thread_number_timestamp_0s_5h_100 as (partition by `thread_number` order by `timestamp` rows_range between 5h open preceding and 0s preceding MAXSIZE 100), 43 | five_ten_retweets_timestamp_0s_32d_100 as (partition by `retweets` order by `timestamp` rows_range between 32d open preceding and 0s preceding MAXSIZE 100), 44 | five_ten_thread_number_timestamp_0s_10h_200 as (partition by `thread_number` order by `timestamp` rows_range between 10h open preceding and 0s preceding MAXSIZE 200); -------------------------------------------------------------------------------- /media/twitter-threads/tables/twitter-threads_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE fifteen_twenty 2 | ( 3 | id VARCHAR, 4 | thread_number VARCHAR, 5 | timestamp VARCHAR, 6 | text VARCHAR, 7 | retweets INT, 8 | likes INT, 9 | replies INT 10 | ); 11 | CREATE TABLE five_ten 12 | ( 13 | id VARCHAR, 14 | thread_number VARCHAR, 15 | timestamp VARCHAR, 16 | text VARCHAR, 17 | retweets INT, 18 | likes INT, 19 | replies INT 20 | ); 21 | CREATE TABLE ten_fifteen 22 | ( 23 | id VARCHAR, 24 | thread_number VARCHAR, 25 | timestamp VARCHAR, 26 | text VARCHAR, 27 | retweets INT, 28 | likes INT, 29 | replies INT 30 | ); 31 | CREATE TABLE twenty_twentyfive 32 | ( 33 | id VARCHAR, 34 | thread_number VARCHAR, 35 | timestamp VARCHAR, 36 | text VARCHAR, 37 | retweets INT, 38 | likes INT, 39 | replies INT 40 | ); 41 | CREATE TABLE twentyfive_thirty 42 | ( 43 | id VARCHAR, 44 | thread_number VARCHAR, 45 | timestamp VARCHAR, 46 | text VARCHAR, 47 | retweets INT, 48 | likes INT, 49 | replies INT 50 | ); 51 | -------------------------------------------------------------------------------- /meteorology/historicalweatherdataforindiancities/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/meteorology/historicalweatherdataforindiancities/Bangalore_1990_2022_BangaloreCity.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/meteorology/historicalweatherdataforindiancities/Chennai_1990_2022_Madras.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/meteorology/historicalweatherdataforindiancities/Delhi_NCR_1990_2022_Safdarjung.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/meteorology/historicalweatherdataforindiancities/Lucknow_1990_2022.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/meteorology/historicalweatherdataforindiancities/Mumbai_1990_2022_Santacruz.csv 6 | https://dbgroup.cs.tsinghua.edu.cn/datasets/meteorology/historicalweatherdataforindiancities/Rajasthan_1990_2022_Jodhpur.csv 7 | https://dbgroup.cs.tsinghua.edu.cn/datasets/meteorology/historicalweatherdataforindiancities/Station_GeoLocation_Longitute_Latitude_Elevation_EPSG_4326.csv 8 | -------------------------------------------------------------------------------- /meteorology/historicalweatherdataforindiancities/queries/historicalweatherdataforindiancities_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `time` as time_1, 7 | `time` as Bangalore_1990_2022_BangaloreCity_time_original_0, 8 | `prcp` as Bangalore_1990_2022_BangaloreCity_prcp_original_18, 9 | `tavg` as Bangalore_1990_2022_BangaloreCity_tavg_original_19, 10 | `tmax` as Bangalore_1990_2022_BangaloreCity_tmax_original_20, 11 | `tmin` as Bangalore_1990_2022_BangaloreCity_tmin_original_21, 12 | `tmax` as Bangalore_1990_2022_BangaloreCity_tmax_multiply_26, 13 | `tavg` as Bangalore_1990_2022_BangaloreCity_tavg_multiply_27, 14 | `tmax` as Bangalore_1990_2022_BangaloreCity_tmax_multiply_29, 15 | `tavg` as Bangalore_1990_2022_BangaloreCity_tavg_multiply_32, 16 | `tavg` as Bangalore_1990_2022_BangaloreCity_tavg_multiply_34, 17 | `tavg` as Bangalore_1990_2022_BangaloreCity_tavg_multiply_37, 18 | `tmax` as Bangalore_1990_2022_BangaloreCity_tmax_multiply_38, 19 | `tmax` as Bangalore_1990_2022_BangaloreCity_tmax_multiply_39 20 | from 21 | `Bangalore_1990_2022_BangaloreCity` 22 | ) 23 | as out0 24 | last join 25 | ( 26 | select 27 | `Bangalore_1990_2022_BangaloreCity`.`time` as time_2, 28 | `Delhi_NCR_1990_2022_Safdarjung_time`.`tavg` as Delhi_NCR_1990_2022_Safdarjung_tavg_multi_direct_2, 29 | `Chennai_1990_2022_Madras_time`.`prcp` as Chennai_1990_2022_Madras_prcp_multi_direct_3, 30 | `Chennai_1990_2022_Madras_time`.`tavg` as Chennai_1990_2022_Madras_tavg_multi_direct_4, 31 | `Chennai_1990_2022_Madras_time`.`tmax` as Chennai_1990_2022_Madras_tmax_multi_direct_5, 32 | `Chennai_1990_2022_Madras_time`.`tmin` as Chennai_1990_2022_Madras_tmin_multi_direct_6, 33 | `Delhi_NCR_1990_2022_Safdarjung_time`.`prcp` as Delhi_NCR_1990_2022_Safdarjung_prcp_multi_direct_7, 34 | `Delhi_NCR_1990_2022_Safdarjung_time`.`tmax` as Delhi_NCR_1990_2022_Safdarjung_tmax_multi_direct_8, 35 | `Delhi_NCR_1990_2022_Safdarjung_time`.`tmin` as Delhi_NCR_1990_2022_Safdarjung_tmin_multi_direct_9, 36 | `Lucknow_1990_2022_time`.`prcp` as Lucknow_1990_2022_prcp_multi_direct_10, 37 | `Lucknow_1990_2022_time`.`tavg` as Lucknow_1990_2022_tavg_multi_direct_11, 38 | `Lucknow_1990_2022_time`.`tmax` as Lucknow_1990_2022_tmax_multi_direct_12, 39 | `Lucknow_1990_2022_time`.`tmin` as Lucknow_1990_2022_tmin_multi_direct_13, 40 | `Mumbai_1990_2022_Santacruz_time`.`prcp` as Mumbai_1990_2022_Santacruz_prcp_multi_direct_14, 41 | `Mumbai_1990_2022_Santacruz_time`.`tavg` as Mumbai_1990_2022_Santacruz_tavg_multi_direct_15, 42 | `Mumbai_1990_2022_Santacruz_time`.`tmax` as Mumbai_1990_2022_Santacruz_tmax_multi_direct_16, 43 | `Mumbai_1990_2022_Santacruz_time`.`tmin` as Mumbai_1990_2022_Santacruz_tmin_multi_direct_17 44 | from 45 | `Bangalore_1990_2022_BangaloreCity` 46 | last join `Delhi_NCR_1990_2022_Safdarjung` as `Delhi_NCR_1990_2022_Safdarjung_time` on `Bangalore_1990_2022_BangaloreCity`.`time` = `Delhi_NCR_1990_2022_Safdarjung_time`.`time` 47 | last join `Chennai_1990_2022_Madras` as `Chennai_1990_2022_Madras_time` on `Bangalore_1990_2022_BangaloreCity`.`time` = `Chennai_1990_2022_Madras_time`.`time` 48 | last join `Lucknow_1990_2022` as `Lucknow_1990_2022_time` on `Bangalore_1990_2022_BangaloreCity`.`time` = `Lucknow_1990_2022_time`.`time` 49 | last join `Mumbai_1990_2022_Santacruz` as `Mumbai_1990_2022_Santacruz_time` on `Bangalore_1990_2022_BangaloreCity`.`time` = `Mumbai_1990_2022_Santacruz_time`.`time`) 50 | as out1 51 | on out0.time_1 = out1.time_2 52 | ; -------------------------------------------------------------------------------- /meteorology/historicalweatherdataforindiancities/tables/historicalweatherdataforindiancities_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE Bangalore_1990_2022_BangaloreCity 2 | ( 3 | time VARCHAR, 4 | tavg DOUBLE PRECISION, 5 | tmin DOUBLE PRECISION, 6 | tmax DOUBLE PRECISION, 7 | prcp DOUBLE PRECISION 8 | ); 9 | CREATE TABLE Chennai_1990_2022_Madras 10 | ( 11 | time VARCHAR, 12 | tavg DOUBLE PRECISION, 13 | tmin DOUBLE PRECISION, 14 | tmax DOUBLE PRECISION, 15 | prcp DOUBLE PRECISION 16 | ); 17 | CREATE TABLE Delhi_NCR_1990_2022_Safdarjung 18 | ( 19 | time VARCHAR, 20 | tavg DOUBLE PRECISION, 21 | tmin DOUBLE PRECISION, 22 | tmax DOUBLE PRECISION, 23 | prcp DOUBLE PRECISION 24 | ); 25 | CREATE TABLE Lucknow_1990_2022 26 | ( 27 | time VARCHAR, 28 | tavg DOUBLE PRECISION, 29 | tmin DOUBLE PRECISION, 30 | tmax DOUBLE PRECISION, 31 | prcp DOUBLE PRECISION 32 | ); 33 | CREATE TABLE Mumbai_1990_2022_Santacruz 34 | ( 35 | time VARCHAR, 36 | tavg DOUBLE PRECISION, 37 | tmin DOUBLE PRECISION, 38 | tmax DOUBLE PRECISION, 39 | prcp DOUBLE PRECISION 40 | ); 41 | CREATE TABLE Rajasthan_1990_2022_Jodhpur 42 | ( 43 | time VARCHAR, 44 | tavg DOUBLE PRECISION, 45 | tmin DOUBLE PRECISION, 46 | tmax DOUBLE PRECISION, 47 | prcp DOUBLE PRECISION 48 | ); 49 | CREATE TABLE Station_GeoLocation_Longitute_Latitude_Elevation_EPSG_4326 50 | ( 51 | longitude DOUBLE PRECISION, 52 | Latitude DOUBLE PRECISION, 53 | Elevation INT, 54 | Location_Name VARCHAR 55 | ); 56 | -------------------------------------------------------------------------------- /others/DontGetKicked/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/DontGetKicked/training.csv 2 | -------------------------------------------------------------------------------- /others/DontGetKicked/queries/DontGetKicked_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `RefId` as RefId_1, 6 | `PurchDate` as training_PurchDate_original_0, 7 | `RefId` as training_RefId_original_1, 8 | `IsBadBuy` as training_IsBadBuy_original_2, 9 | `AUCGUART` as training_AUCGUART_original_3, 10 | `Auction` as training_Auction_original_4, 11 | `BYRNO` as training_BYRNO_original_5, 12 | `Color` as training_Color_original_6, 13 | `IsOnlineSale` as training_IsOnlineSale_original_7, 14 | `MMRAcquisitionAuctionAveragePrice` as training_MMRAcquisitionAuctionAveragePrice_original_8, 15 | `MMRAcquisitionAuctionCleanPrice` as training_MMRAcquisitionAuctionCleanPrice_original_9, 16 | `MMRAcquisitionRetailAveragePrice` as training_MMRAcquisitionRetailAveragePrice_original_10, 17 | `MMRAcquisitonRetailCleanPrice` as training_MMRAcquisitonRetailCleanPrice_original_11, 18 | `MMRCurrentAuctionAveragePrice` as training_MMRCurrentAuctionAveragePrice_original_12, 19 | `MMRCurrentAuctionCleanPrice` as training_MMRCurrentAuctionCleanPrice_original_13, 20 | `MMRCurrentRetailAveragePrice` as training_MMRCurrentRetailAveragePrice_original_14, 21 | `MMRCurrentRetailCleanPrice` as training_MMRCurrentRetailCleanPrice_original_15, 22 | `Make` as training_Make_original_16, 23 | `Model` as training_Model_original_17, 24 | `Nationality` as training_Nationality_original_18, 25 | `PRIMEUNIT` as training_PRIMEUNIT_original_19, 26 | `Size` as training_Size_original_20, 27 | `SubModel` as training_SubModel_original_21, 28 | `TopThreeAmericanName` as training_TopThreeAmericanName_original_22, 29 | `Transmission` as training_Transmission_original_23, 30 | `Trim` as training_Trim_original_24, 31 | `VNST` as training_VNST_original_25, 32 | `VNZIP1` as training_VNZIP1_original_26, 33 | `VehBCost` as training_VehBCost_original_27, 34 | `VehOdo` as training_VehOdo_original_28, 35 | `VehYear` as training_VehYear_original_29, 36 | `VehicleAge` as training_VehicleAge_original_30, 37 | `WarrantyCost` as training_WarrantyCost_original_31, 38 | `WheelType` as training_WheelType_original_32, 39 | `WheelTypeID` as training_WheelTypeID_original_33, 40 | `WheelTypeID` as training_WheelTypeID_combine_34, 41 | `Auction` as training_Auction_combine_34, 42 | `WheelType` as training_WheelType_combine_35, 43 | `Auction` as training_Auction_combine_35, 44 | `WheelTypeID` as training_WheelTypeID_combine_36, 45 | `PRIMEUNIT` as training_PRIMEUNIT_combine_36, 46 | `WheelType` as training_WheelType_combine_37, 47 | `PRIMEUNIT` as training_PRIMEUNIT_combine_37, 48 | `WheelTypeID` as training_WheelTypeID_combine_38, 49 | `AUCGUART` as training_AUCGUART_combine_38, 50 | `WheelType` as training_WheelType_combine_39, 51 | `AUCGUART` as training_AUCGUART_combine_39, 52 | `VehicleAge` as training_VehicleAge_combine_40, 53 | `AUCGUART` as training_AUCGUART_combine_40, 54 | `VehicleAge` as training_VehicleAge_combine_41, 55 | `PRIMEUNIT` as training_PRIMEUNIT_combine_41, 56 | `PRIMEUNIT` as training_PRIMEUNIT_combine_42, 57 | `Nationality` as training_Nationality_combine_42, 58 | `Model` as training_Model_combine_43, 59 | `VNST` as training_VNST_combine_43, 60 | `Model` as training_Model_combine_44, 61 | `Auction` as training_Auction_combine_44, 62 | `Model` as training_Model_combine_45, 63 | `Color` as training_Color_combine_45, 64 | `WarrantyCost` as training_WarrantyCost_combine_46, 65 | `Model` as training_Model_combine_46, 66 | `VNZIP1` as training_VNZIP1_combine_47, 67 | `BYRNO` as training_BYRNO_combine_47, 68 | `WarrantyCost` as training_WarrantyCost_combine_48, 69 | `SubModel` as training_SubModel_combine_48, 70 | `VNZIP1` as training_VNZIP1_combine_49, 71 | `AUCGUART` as training_AUCGUART_combine_49, 72 | `WarrantyCost` as training_WarrantyCost_combine_50, 73 | `Make` as training_Make_combine_50, 74 | max(`VehBCost`) over training_MMRCurrentAuctionCleanPrice_PurchDate_0s_7d_200 as training_VehBCost_window_max_51 75 | from 76 | `training` 77 | window training_MMRCurrentAuctionCleanPrice_PurchDate_0s_7d_200 as (partition by `MMRCurrentAuctionCleanPrice` order by `PurchDate` rows_range between 7d open preceding and 0s preceding MAXSIZE 200); -------------------------------------------------------------------------------- /others/DontGetKicked/tables/DontGetKicked_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE training (RefId INT, IsBadBuy INT, PurchDate TIMESTAMP, Auction VARCHAR, VehYear INT, VehicleAge INT, Make VARCHAR, Model VARCHAR, Trim VARCHAR, SubModel VARCHAR, Color VARCHAR, Transmission VARCHAR, WheelTypeID VARCHAR, WheelType VARCHAR, VehOdo INT, Nationality VARCHAR, Size VARCHAR, TopThreeAmericanName VARCHAR, MMRAcquisitionAuctionAveragePrice VARCHAR, MMRAcquisitionAuctionCleanPrice VARCHAR, MMRAcquisitionRetailAveragePrice VARCHAR, MMRAcquisitonRetailCleanPrice VARCHAR, MMRCurrentAuctionAveragePrice VARCHAR, MMRCurrentAuctionCleanPrice VARCHAR, MMRCurrentRetailAveragePrice VARCHAR, MMRCurrentRetailCleanPrice VARCHAR, PRIMEUNIT VARCHAR, AUCGUART VARCHAR, BYRNO INT, VNZIP1 INT, VNST VARCHAR, VehBCost DOUBLE PRECISION, IsOnlineSale INT, WarrantyCost INT); 2 | -------------------------------------------------------------------------------- /others/Hybrid_Indoor_Positioning/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/Hybrid_Indoor_Positioning/dataset.csv 2 | -------------------------------------------------------------------------------- /others/Hybrid_Indoor_Positioning/tables/Hybrid_Indoor_Positioning_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE dataset 2 | ( 3 | measId VARCHAR, 4 | measTimestamp TIMESTAMP, 5 | Position_X DOUBLE PRECISION, 6 | _Position_Y DOUBLE PRECISION, 7 | Position_Z DOUBLE PRECISION, 8 | zoneId VARCHAR, 9 | Zonename VARCHAR, 10 | meas_X VARCHAR, 11 | meas_Y VARCHAR, 12 | meas_Z VARCHAR, 13 | gpsLatitude VARCHAR, 14 | gpsLongitude VARCHAR, 15 | gpsAltitude VARCHAR, 16 | N VARCHAR, 17 | O VARCHAR, 18 | AIT_L15 VARCHAR, 19 | aut_sams_1 VARCHAR, 20 | bolyai_E4_floor3 VARCHAR, 21 | Bosch_Telemetry VARCHAR, 22 | dd VARCHAR, 23 | doa2 VARCHAR, 24 | doa200 VARCHAR, 25 | doa203 VARCHAR, 26 | doa207 VARCHAR, 27 | doa208 VARCHAR, 28 | doa6 VARCHAR, 29 | EET_3 VARCHAR, 30 | FRM VARCHAR, 31 | GEIAKFSZ VARCHAR, 32 | IITAP1 VARCHAR, 33 | IITAP1_GUEST VARCHAR, 34 | IITAP2 VARCHAR, 35 | IITAP2_GUEST VARCHAR, 36 | IITAP3 VARCHAR, 37 | IITAP3_GUEST VARCHAR, 38 | info VARCHAR, 39 | info2 VARCHAR, 40 | KEMA10 VARCHAR, 41 | kemA4 VARCHAR, 42 | KRZ VARCHAR, 43 | library114 VARCHAR, 44 | TP_LINK_B2765A VARCHAR, 45 | UPC_Wi_Free VARCHAR, 46 | UPC8902044 VARCHAR, 47 | wireless VARCHAR, 48 | COL1 DOUBLE PRECISION, 49 | COL2 DOUBLE PRECISION, 50 | COL3 DOUBLE PRECISION, 51 | COL4 DOUBLE PRECISION, 52 | COL5 DOUBLE PRECISION, 53 | COL6 DOUBLE PRECISION, 54 | COL7 DOUBLE PRECISION, 55 | COL8 DOUBLE PRECISION, 56 | COL9 DOUBLE PRECISION, 57 | COL10 DOUBLE PRECISION, 58 | COL11 DOUBLE PRECISION, 59 | COL12 DOUBLE PRECISION, 60 | COL13 DOUBLE PRECISION, 61 | COL14 DOUBLE PRECISION, 62 | COL15 DOUBLE PRECISION, 63 | COL16 DOUBLE PRECISION, 64 | COL17 DOUBLE PRECISION, 65 | COL18 DOUBLE PRECISION, 66 | COL19 DOUBLE PRECISION, 67 | COL20 DOUBLE PRECISION, 68 | COL21 DOUBLE PRECISION, 69 | COL22 DOUBLE PRECISION 70 | ); 71 | -------------------------------------------------------------------------------- /others/RSSI_dataset/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/RSSI_dataset/filtered_rssi.csv 2 | -------------------------------------------------------------------------------- /others/RSSI_dataset/tables/RSSI_dataset_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE filtered_rssi (id INT, name VARCHAR, locationStatus VARCHAR, timestamp BIGINT, rssiOne INT, rssiTwo INT); 2 | -------------------------------------------------------------------------------- /others/airbnb-recruiting-new-user-bookings/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/airbnb-recruiting-new-user-bookings/age_gender_bkts.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/airbnb-recruiting-new-user-bookings/countries.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/airbnb-recruiting-new-user-bookings/sessions.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/airbnb-recruiting-new-user-bookings/train_users_2.csv 5 | -------------------------------------------------------------------------------- /others/airbnb-recruiting-new-user-bookings/queries/airbnb-recruiting-new-user-bookings_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `id` as id_1, 7 | `timestamp_first_active` as train_users_2_timestamp_first_active_original_0, 8 | `id` as train_users_2_id_original_1, 9 | `affiliate_channel` as train_users_2_affiliate_channel_original_7, 10 | `affiliate_provider` as train_users_2_affiliate_provider_original_8, 11 | `age` as train_users_2_age_original_9, 12 | `country_destination` as train_users_2_country_destination_original_10, 13 | `date_account_created` as train_users_2_date_account_created_original_11, 14 | `date_first_booking` as train_users_2_date_first_booking_original_12, 15 | `first_affiliate_tracked` as train_users_2_first_affiliate_tracked_original_13, 16 | `first_browser` as train_users_2_first_browser_original_14, 17 | `first_device_type` as train_users_2_first_device_type_original_15, 18 | `gender` as train_users_2_gender_original_16, 19 | `language` as train_users_2_language_original_17, 20 | `signup_app` as train_users_2_signup_app_original_18, 21 | `signup_flow` as train_users_2_signup_flow_original_19, 22 | `signup_method` as train_users_2_signup_method_original_20, 23 | `country_destination` as train_users_2_country_destination_combine_21, 24 | `gender` as train_users_2_gender_combine_23, 25 | `country_destination` as train_users_2_country_destination_combine_24, 26 | `gender` as train_users_2_gender_combine_24, 27 | `country_destination` as train_users_2_country_destination_combine_25, 28 | `affiliate_provider` as train_users_2_affiliate_provider_combine_25, 29 | `country_destination` as train_users_2_country_destination_combine_26, 30 | `affiliate_provider` as train_users_2_affiliate_provider_combine_26, 31 | `signup_app` as train_users_2_signup_app_combine_27, 32 | `first_device_type` as train_users_2_first_device_type_combine_27, 33 | `first_affiliate_tracked` as train_users_2_first_affiliate_tracked_combine_27, 34 | `signup_flow` as train_users_2_signup_flow_combine_28, 35 | `first_device_type` as train_users_2_first_device_type_combine_28, 36 | `first_affiliate_tracked` as train_users_2_first_affiliate_tracked_combine_28, 37 | `signup_app` as train_users_2_signup_app_combine_29, 38 | `signup_method` as train_users_2_signup_method_combine_29, 39 | `first_affiliate_tracked` as train_users_2_first_affiliate_tracked_combine_29, 40 | `date_account_created` as train_users_2_date_account_created_combine_30, 41 | `date_first_booking` as train_users_2_date_first_booking_combine_30, 42 | `first_affiliate_tracked` as train_users_2_first_affiliate_tracked_combine_30, 43 | `date_account_created` as train_users_2_date_account_created_combine_31, 44 | `date_first_booking` as train_users_2_date_first_booking_combine_31, 45 | `first_browser` as train_users_2_first_browser_combine_31, 46 | `date_account_created` as train_users_2_date_account_created_combine_32, 47 | `date_first_booking` as train_users_2_date_first_booking_combine_32, 48 | `signup_method` as train_users_2_signup_method_combine_32, 49 | `date_account_created` as train_users_2_date_account_created_combine_33, 50 | `gender` as train_users_2_gender_combine_33, 51 | `date_first_booking` as train_users_2_date_first_booking_combine_33, 52 | min(`age`) over train_users_2_country_destination_timestamp_first_active_0s_2d_100 as train_users_2_age_window_min_34, 53 | max(`age`) over train_users_2_country_destination_timestamp_first_active_0s_2d_100 as train_users_2_age_window_max_35, 54 | sum(`age`) over train_users_2_country_destination_timestamp_first_active_0s_2d_200 as train_users_2_age_window_sum_36, 55 | avg(`age`) over train_users_2_country_destination_timestamp_first_active_0s_2d_200 as train_users_2_age_window_avg_37, 56 | avg(`age`) over train_users_2_country_destination_timestamp_first_active_0s_10h_200 as train_users_2_age_window_avg_38 57 | from 58 | `train_users_2` 59 | window train_users_2_country_destination_timestamp_first_active_0s_2d_100 as (partition by `country_destination` order by `timestamp_first_active` rows_range between 2d open preceding and 0s preceding MAXSIZE 100), 60 | train_users_2_country_destination_timestamp_first_active_0s_2d_200 as (partition by `country_destination` order by `timestamp_first_active` rows_range between 2d open preceding and 0s preceding MAXSIZE 200), 61 | train_users_2_country_destination_timestamp_first_active_0s_10h_200 as (partition by `country_destination` order by `timestamp_first_active` rows_range between 10h open preceding and 0s preceding MAXSIZE 200)) 62 | as out0 63 | last join 64 | ( 65 | select 66 | `train_users_2`.`id` as id_3, 67 | `countries_country_destination`.`distance_km` as countries_distance_km_multi_direct_2, 68 | `age_gender_bkts_country_destination`.`age_bucket` as age_gender_bkts_age_bucket_multi_direct_3, 69 | `age_gender_bkts_country_destination`.`gender` as age_gender_bkts_gender_multi_direct_4, 70 | `age_gender_bkts_country_destination`.`year` as age_gender_bkts_year_multi_direct_5, 71 | `countries_country_destination`.`destination_language` as countries_destination_language_multi_direct_6 72 | from 73 | `train_users_2` 74 | last join `countries` as `countries_country_destination` on `train_users_2`.`country_destination` = `countries_country_destination`.`country_destination` 75 | last join `age_gender_bkts` as `age_gender_bkts_country_destination` on `train_users_2`.`country_destination` = `age_gender_bkts_country_destination`.`country_destination`) 76 | as out1 77 | on out0.id_1 = out1.id_3 78 | ; -------------------------------------------------------------------------------- /others/airbnb-recruiting-new-user-bookings/tables/airbnb-recruiting-new-user-bookings_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE age_gender_bkts (age_bucket VARCHAR, country_destination VARCHAR, gender VARCHAR, population_in_thousands DOUBLE PRECISION, year DOUBLE PRECISION); 2 | CREATE TABLE countries (country_destination VARCHAR, lat_destination DOUBLE PRECISION, lng_destination DOUBLE PRECISION, distance_km DOUBLE PRECISION, destination_km2 DOUBLE PRECISION, destination_language VARCHAR, language_levenshtein_distance DOUBLE PRECISION); 3 | CREATE TABLE sessions (user_id VARCHAR, action VARCHAR, action_type VARCHAR, action_detail VARCHAR, device_type VARCHAR, secs_elapsed DOUBLE PRECISION); 4 | CREATE TABLE train_users_2 (id VARCHAR, date_account_created VARCHAR, timestamp_first_active TIMESTAMP, date_first_booking VARCHAR, gender VARCHAR, age DOUBLE PRECISION, signup_method VARCHAR, signup_flow INT, language VARCHAR, affiliate_channel VARCHAR, affiliate_provider VARCHAR, first_affiliate_tracked VARCHAR, signup_app VARCHAR, first_device_type VARCHAR, first_browser VARCHAR, country_destination VARCHAR); 5 | -------------------------------------------------------------------------------- /others/bike-sharing-demand/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/bike-sharing-demand/train.csv 2 | -------------------------------------------------------------------------------- /others/bike-sharing-demand/queries/bike-sharing-demand_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `datetime` as datetime_1, 6 | `datetime` as train_datetime_original_0, 7 | `count` as train_count_original_2, 8 | `atemp` as train_atemp_original_3, 9 | `casual` as train_casual_original_4, 10 | `holiday` as train_holiday_original_5, 11 | `humidity` as train_humidity_original_6, 12 | `registered` as train_registered_original_7, 13 | `season` as train_season_original_8, 14 | `temp` as train_temp_original_9, 15 | `weather` as train_weather_original_10, 16 | `windspeed` as train_windspeed_original_11, 17 | `workingday` as train_workingday_original_12, 18 | hour(timestamp(`datetime`)) as train_datetime_hourofday_13, 19 | case when !isnull(at(`workingday`, 0)) over train_holiday_datetime_0s_5d_200 then count_where(`workingday`, `workingday` = at(`workingday`, 0)) over train_holiday_datetime_0s_5d_200 else null end as train_workingday_window_count_14, 20 | case when !isnull(at(`workingday`, 0)) over train_holiday_datetime_0s_7d_100 then count_where(`workingday`, `workingday` = at(`workingday`, 0)) over train_holiday_datetime_0s_7d_100 else null end as train_workingday_window_count_15, 21 | case when !isnull(at(`workingday`, 0)) over train_season_datetime_0s_5d_200 then count_where(`workingday`, `workingday` = at(`workingday`, 0)) over train_season_datetime_0s_5d_200 else null end as train_workingday_window_count_16, 22 | case when !isnull(at(`workingday`, 0)) over train_season_datetime_0s_7d_100 then count_where(`workingday`, `workingday` = at(`workingday`, 0)) over train_season_datetime_0s_7d_100 else null end as train_workingday_window_count_17, 23 | avg(`windspeed`) over train_humidity_datetime_0s_32d_100 as train_windspeed_window_avg_18, 24 | avg(`atemp`) over train_humidity_datetime_0s_32d_100 as train_atemp_window_avg_19, 25 | case when !isnull(at(`casual`, 0)) over train_registered_datetime_0s_32d_200 then count_where(`casual`, `casual` = at(`casual`, 0)) over train_registered_datetime_0s_32d_200 else null end as train_casual_window_count_20, 26 | min(`temp`) over train_humidity_datetime_0s_10h_200 as train_temp_window_min_21, 27 | avg(`windspeed`) over train_registered_datetime_0s_32d_100 as train_windspeed_window_avg_22, 28 | avg(`windspeed`) over train_casual_datetime_0s_32d_200 as train_windspeed_window_avg_23, 29 | avg(`atemp`) over train_casual_datetime_0s_32d_200 as train_atemp_window_avg_24, 30 | avg(`temp`) over train_casual_datetime_0s_32d_200 as train_temp_window_avg_25, 31 | max(`windspeed`) over train_humidity_datetime_0s_32d_200 as train_windspeed_window_max_26, 32 | max(`atemp`) over train_humidity_datetime_0s_32d_200 as train_atemp_window_max_27, 33 | avg(`temp`) over train_humidity_datetime_0s_32d_100 as train_temp_window_avg_28, 34 | avg(`atemp`) over train_weather_datetime_0s_32d_100 as train_atemp_window_avg_29, 35 | case when !isnull(at(`season`, 0)) over train_holiday_datetime_0s_7d_100 then count_where(`season`, `season` = at(`season`, 0)) over train_holiday_datetime_0s_7d_100 else null end as train_season_window_count_30 36 | from 37 | `train` 38 | window train_holiday_datetime_0s_5d_200 as (partition by `holiday` order by `datetime` rows_range between 5d open preceding and 0s preceding MAXSIZE 200), 39 | train_holiday_datetime_0s_7d_100 as (partition by `holiday` order by `datetime` rows_range between 7d open preceding and 0s preceding MAXSIZE 100), 40 | train_season_datetime_0s_5d_200 as (partition by `season` order by `datetime` rows_range between 5d open preceding and 0s preceding MAXSIZE 200), 41 | train_season_datetime_0s_7d_100 as (partition by `season` order by `datetime` rows_range between 7d open preceding and 0s preceding MAXSIZE 100), 42 | train_humidity_datetime_0s_32d_100 as (partition by `humidity` order by `datetime` rows_range between 32d open preceding and 0s preceding MAXSIZE 100), 43 | train_registered_datetime_0s_32d_200 as (partition by `registered` order by `datetime` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 44 | train_humidity_datetime_0s_10h_200 as (partition by `humidity` order by `datetime` rows_range between 10h open preceding and 0s preceding MAXSIZE 200), 45 | train_registered_datetime_0s_32d_100 as (partition by `registered` order by `datetime` rows_range between 32d open preceding and 0s preceding MAXSIZE 100), 46 | train_casual_datetime_0s_32d_200 as (partition by `casual` order by `datetime` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 47 | train_humidity_datetime_0s_32d_200 as (partition by `humidity` order by `datetime` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 48 | train_weather_datetime_0s_32d_100 as (partition by `weather` order by `datetime` rows_range between 32d open preceding and 0s preceding MAXSIZE 100); -------------------------------------------------------------------------------- /others/bike-sharing-demand/tables/bike-sharing-demand_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE train (datetime TIMESTAMP, season INT, holiday INT, workingday INT, weather INT, temp DOUBLE PRECISION, atemp DOUBLE PRECISION, humidity INT, windspeed DOUBLE PRECISION, casual INT, registered INT, count INT); 2 | -------------------------------------------------------------------------------- /others/cyclistic-bike-share-user-dataset-1-year/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/cyclistic-bike-share-user-dataset-1-year/df_1_year.csv 2 | -------------------------------------------------------------------------------- /others/cyclistic-bike-share-user-dataset-1-year/queries/cyclistic-bike-share-user-dataset-1-year_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `id` as id_1, 6 | `started_at` as df_1_year_started_at_original_0, 7 | `id` as df_1_year_id_original_1, 8 | `ride_length` as df_1_year_ride_length_original_2, 9 | `Month` as df_1_year_Month_original_3, 10 | `end_lat` as df_1_year_end_lat_original_4, 11 | `end_lng` as df_1_year_end_lng_original_5, 12 | `end_station_id` as df_1_year_end_station_id_original_6, 13 | `end_station_name` as df_1_year_end_station_name_original_7, 14 | `ended_at` as df_1_year_ended_at_original_8, 15 | `member_casual` as df_1_year_member_casual_original_9, 16 | `rideable_type` as df_1_year_rideable_type_original_10, 17 | `start_lat` as df_1_year_start_lat_original_11, 18 | `start_lng` as df_1_year_start_lng_original_12, 19 | `start_station_id` as df_1_year_start_station_id_original_13, 20 | `start_station_name` as df_1_year_start_station_name_original_14, 21 | `start_lat` as df_1_year_start_lat_divide_15, 22 | `end_lat` as df_1_year_end_lat_divide_15, 23 | `end_lat` as df_1_year_end_lat_divide_16, 24 | `start_lat` as df_1_year_start_lat_divide_16, 25 | `end_station_id` as df_1_year_end_station_id_combine_17, 26 | `start_station_id` as df_1_year_start_station_id_combine_17, 27 | `end_station_name` as df_1_year_end_station_name_combine_18, 28 | `start_station_id` as df_1_year_start_station_id_combine_18, 29 | `end_station_name` as df_1_year_end_station_name_combine_19, 30 | `end_station_id` as df_1_year_end_station_id_combine_19, 31 | `start_station_id` as df_1_year_start_station_id_combine_19, 32 | `end_station_id` as df_1_year_end_station_id_combine_20, 33 | `start_station_name` as df_1_year_start_station_name_combine_20, 34 | `end_station_id` as df_1_year_end_station_id_combine_21, 35 | `start_station_id` as df_1_year_start_station_id_combine_21, 36 | `start_station_name` as df_1_year_start_station_name_combine_21, 37 | `end_station_name` as df_1_year_end_station_name_combine_22, 38 | `start_station_name` as df_1_year_start_station_name_combine_22, 39 | `end_station_name` as df_1_year_end_station_name_combine_23, 40 | `start_station_name` as df_1_year_start_station_name_combine_23, 41 | `rideable_type` as df_1_year_rideable_type_combine_23, 42 | `end_lng` as df_1_year_end_lng_divide_24, 43 | `start_lng` as df_1_year_start_lng_divide_24, 44 | `start_lng` as df_1_year_start_lng_divide_25, 45 | `end_lng` as df_1_year_end_lng_divide_25, 46 | `end_lng` as df_1_year_end_lng_multiply_26, 47 | `start_lng` as df_1_year_start_lng_multiply_26, 48 | hour(timestamp(`ended_at`)) as df_1_year_ended_at_hourofday_27, 49 | hour(timestamp(`started_at`)) as df_1_year_started_at_hourofday_28, 50 | avg(`end_lat`) over df_1_year_end_station_name_started_at_0s_32d_200 as df_1_year_end_lat_window_avg_29, 51 | sum(`end_lat`) over df_1_year_member_casual_started_at_0s_32d_200 as df_1_year_end_lat_window_sum_30, 52 | sum(`start_lat`) over df_1_year_start_station_id_started_at_0s_32d_200 as df_1_year_start_lat_window_sum_31, 53 | sum(`end_lng`) over df_1_year_start_station_name_started_at_0s_32d_200 as df_1_year_end_lng_window_sum_32 54 | from 55 | `df_1_year` 56 | window df_1_year_end_station_name_started_at_0s_32d_200 as (partition by `end_station_name` order by `started_at` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 57 | df_1_year_member_casual_started_at_0s_32d_200 as (partition by `member_casual` order by `started_at` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 58 | df_1_year_start_station_id_started_at_0s_32d_200 as (partition by `start_station_id` order by `started_at` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 59 | df_1_year_start_station_name_started_at_0s_32d_200 as (partition by `start_station_name` order by `started_at` rows_range between 32d open preceding and 0s preceding MAXSIZE 200); -------------------------------------------------------------------------------- /others/cyclistic-bike-share-user-dataset-1-year/tables/cyclistic-bike-share-user-dataset-1-year_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE df_1_year (id INT, ride_id VARCHAR, rideable_type VARCHAR, started_at TIMESTAMP, ended_at TIMESTAMP, start_station_name VARCHAR, start_station_id VARCHAR, end_station_name VARCHAR, end_station_id VARCHAR, start_lat DOUBLE PRECISION, start_lng DOUBLE PRECISION, end_lat DOUBLE PRECISION, end_lng DOUBLE PRECISION, member_casual VARCHAR, Year INT, Month INT, ride_length DOUBLE PRECISION, Year_Month TIMESTAMP); 2 | -------------------------------------------------------------------------------- /others/data-science-job-salaries/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/data-science-job-salaries/ds_salaries.csv 2 | -------------------------------------------------------------------------------- /others/data-science-job-salaries/tables/data-science-job-salaries_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE ds_salaries (id INT, work_year INT, experience_level VARCHAR, employment_type VARCHAR, job_title VARCHAR, salary INT, salary_currency VARCHAR, salary_in_usd INT, employee_residence VARCHAR, remote_ratio INT, company_location VARCHAR, company_size VARCHAR); 2 | -------------------------------------------------------------------------------- /others/expedia-hotel-recommendations/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/expedia-hotel-recommendations/destinations.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/expedia-hotel-recommendations/train.csv 3 | -------------------------------------------------------------------------------- /others/expedia-hotel-recommendations/tables/expedia-hotel-recommendations_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE destinations (srch_destination_id INT, d1 DOUBLE PRECISION, d2 DOUBLE PRECISION, d3 DOUBLE PRECISION, d4 DOUBLE PRECISION, d5 DOUBLE PRECISION, d6 DOUBLE PRECISION, d7 DOUBLE PRECISION, d8 DOUBLE PRECISION, d9 DOUBLE PRECISION, d10 DOUBLE PRECISION, d11 DOUBLE PRECISION, d12 DOUBLE PRECISION, d13 DOUBLE PRECISION, d14 DOUBLE PRECISION, d15 DOUBLE PRECISION, d16 DOUBLE PRECISION, d17 DOUBLE PRECISION, d18 DOUBLE PRECISION, d19 DOUBLE PRECISION, d20 DOUBLE PRECISION, d21 DOUBLE PRECISION, d22 DOUBLE PRECISION, d23 DOUBLE PRECISION, d24 DOUBLE PRECISION, d25 DOUBLE PRECISION, d26 DOUBLE PRECISION, d27 DOUBLE PRECISION, d28 DOUBLE PRECISION, d29 DOUBLE PRECISION, d30 DOUBLE PRECISION, d31 DOUBLE PRECISION, d32 DOUBLE PRECISION, d33 DOUBLE PRECISION, d34 DOUBLE PRECISION, d35 DOUBLE PRECISION, d36 DOUBLE PRECISION, d37 DOUBLE PRECISION, d38 DOUBLE PRECISION, d39 DOUBLE PRECISION, d40 DOUBLE PRECISION, d41 DOUBLE PRECISION, d42 DOUBLE PRECISION, d43 DOUBLE PRECISION, d44 DOUBLE PRECISION, d45 DOUBLE PRECISION, d46 DOUBLE PRECISION, d47 DOUBLE PRECISION, d48 DOUBLE PRECISION, d49 DOUBLE PRECISION, d50 DOUBLE PRECISION, d51 DOUBLE PRECISION, d52 DOUBLE PRECISION, d53 DOUBLE PRECISION, d54 DOUBLE PRECISION, d55 DOUBLE PRECISION, d56 DOUBLE PRECISION, d57 DOUBLE PRECISION, d58 DOUBLE PRECISION, d59 DOUBLE PRECISION, d60 DOUBLE PRECISION, d61 DOUBLE PRECISION, d62 DOUBLE PRECISION, d63 DOUBLE PRECISION, d64 DOUBLE PRECISION, d65 DOUBLE PRECISION, d66 DOUBLE PRECISION, d67 DOUBLE PRECISION, d68 DOUBLE PRECISION, d69 DOUBLE PRECISION, d70 DOUBLE PRECISION, d71 DOUBLE PRECISION, d72 DOUBLE PRECISION, d73 DOUBLE PRECISION, d74 DOUBLE PRECISION, d75 DOUBLE PRECISION, d76 DOUBLE PRECISION, d77 DOUBLE PRECISION, d78 DOUBLE PRECISION, d79 DOUBLE PRECISION, d80 DOUBLE PRECISION, d81 DOUBLE PRECISION, d82 DOUBLE PRECISION, d83 DOUBLE PRECISION, d84 DOUBLE PRECISION, d85 DOUBLE PRECISION, d86 DOUBLE PRECISION, d87 DOUBLE PRECISION, d88 DOUBLE PRECISION, d89 DOUBLE PRECISION, d90 DOUBLE PRECISION, d91 DOUBLE PRECISION, d92 DOUBLE PRECISION, d93 DOUBLE PRECISION, d94 DOUBLE PRECISION, d95 DOUBLE PRECISION, d96 DOUBLE PRECISION, d97 DOUBLE PRECISION, d98 DOUBLE PRECISION, d99 DOUBLE PRECISION, d100 DOUBLE PRECISION, d101 DOUBLE PRECISION, d102 DOUBLE PRECISION, d103 DOUBLE PRECISION, d104 DOUBLE PRECISION, d105 DOUBLE PRECISION, d106 DOUBLE PRECISION, d107 DOUBLE PRECISION, d108 DOUBLE PRECISION, d109 DOUBLE PRECISION, d110 DOUBLE PRECISION, d111 DOUBLE PRECISION, d112 DOUBLE PRECISION, d113 DOUBLE PRECISION, d114 DOUBLE PRECISION, d115 DOUBLE PRECISION, d116 DOUBLE PRECISION, d117 DOUBLE PRECISION, d118 DOUBLE PRECISION, d119 DOUBLE PRECISION, d120 DOUBLE PRECISION, d121 DOUBLE PRECISION, d122 DOUBLE PRECISION, d123 DOUBLE PRECISION, d124 DOUBLE PRECISION, d125 DOUBLE PRECISION, d126 DOUBLE PRECISION, d127 DOUBLE PRECISION, d128 DOUBLE PRECISION, d129 DOUBLE PRECISION, d130 DOUBLE PRECISION, d131 DOUBLE PRECISION, d132 DOUBLE PRECISION, d133 DOUBLE PRECISION, d134 DOUBLE PRECISION, d135 DOUBLE PRECISION, d136 DOUBLE PRECISION, d137 DOUBLE PRECISION, d138 DOUBLE PRECISION, d139 DOUBLE PRECISION, d140 DOUBLE PRECISION, d141 DOUBLE PRECISION, d142 DOUBLE PRECISION, d143 DOUBLE PRECISION, d144 DOUBLE PRECISION, d145 DOUBLE PRECISION, d146 DOUBLE PRECISION, d147 DOUBLE PRECISION, d148 DOUBLE PRECISION, d149 DOUBLE PRECISION); 2 | CREATE TABLE train (date_time TIMESTAMP, site_name INT, posa_continent INT, user_location_country INT, user_location_region INT, user_location_city INT, orig_destination_distance DOUBLE PRECISION, user_id INT, is_mobile INT, is_package INT, channel INT, srch_ci VARCHAR, srch_co VARCHAR, srch_adults_cnt INT, srch_children_cnt INT, srch_rm_cnt INT, srch_destination_id INT, srch_destination_type_id INT, is_booking INT, cnt INT, hotel_continent INT, hotel_country INT, hotel_market INT, hotel_cluster INT); 3 | -------------------------------------------------------------------------------- /others/foursquare-location-matching/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/foursquare-location-matching/main.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/foursquare-location-matching/pairs.csv 3 | -------------------------------------------------------------------------------- /others/foursquare-location-matching/tables/foursquare-location-matching_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE main (id VARCHAR, name VARCHAR, latitude VARCHAR, longitude VARCHAR, address VARCHAR, city VARCHAR, state VARCHAR, zip VARCHAR, country VARCHAR, url VARCHAR, phone VARCHAR, categories VARCHAR, point_of_interest VARCHAR); 2 | CREATE TABLE pairs (id_1 VARCHAR, name_1 VARCHAR, latitude_1 VARCHAR, longitude_1 VARCHAR, address_1 VARCHAR, city_1 VARCHAR, state_1 VARCHAR, zip_1 VARCHAR, country_1 VARCHAR, url_1 VARCHAR, phone_1 VARCHAR, categories_1 VARCHAR, id_2 VARCHAR, name_2 VARCHAR, latitude_2 VARCHAR, longitude_2 VARCHAR, address_2 VARCHAR, city_2 VARCHAR, state_2 VARCHAR, zip_2 VARCHAR, country_2 VARCHAR, url_2 VARCHAR, phone_2 VARCHAR, categories_2 VARCHAR, match VARCHAR); 3 | -------------------------------------------------------------------------------- /others/korean-baseball-pitching-data-1982-2021/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/korean-baseball-pitching-data-1982-2021/kbopitchingdata.csv 2 | -------------------------------------------------------------------------------- /others/korean-baseball-pitching-data-1982-2021/tables/korean-baseball-pitching-data-1982-2021_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE kbopitchingdata 2 | ( 3 | id INT, 4 | year VARCHAR, 5 | team VARCHAR, 6 | average_age DOUBLE PRECISION, 7 | runs_per_game DOUBLE PRECISION, 8 | wins INT, 9 | losses INT, 10 | win_loss_percentage DOUBLE PRECISION, 11 | ERA DOUBLE PRECISION, 12 | run_average_9 DOUBLE PRECISION, 13 | games INT, 14 | games_started INT, 15 | games_finished INT, 16 | complete_game INT, 17 | shutouts INT, 18 | saves INT, 19 | innings_pitched DOUBLE PRECISION, 20 | hits INT, 21 | runs INT, 22 | earned_runs INT, 23 | home_runs INT, 24 | walks INT, 25 | intentional_walks INT, 26 | strikeouts INT, 27 | hit_batter INT, 28 | balks INT, 29 | wild_pitches INT, 30 | batters_faced INT, 31 | WHIP DOUBLE PRECISION, 32 | hits_9 DOUBLE PRECISION, 33 | homeruns_9 DOUBLE PRECISION, 34 | walks_9 DOUBLE PRECISION, 35 | strikeouts_9 DOUBLE PRECISION, 36 | strikeout_walk DOUBLE PRECISION 37 | ); 38 | -------------------------------------------------------------------------------- /others/sf-crime/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/sf-crime/train.csv 2 | -------------------------------------------------------------------------------- /others/sf-crime/tables/sf-crime_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE train (id INT, Dates TIMESTAMP, Category VARCHAR, Descript VARCHAR, DayOfWeek VARCHAR, PdDistrict VARCHAR, Resolution VARCHAR, Address VARCHAR, X DOUBLE PRECISION, Y DOUBLE PRECISION); 2 | -------------------------------------------------------------------------------- /others/talkingdata-mobile-user-demographics/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/talkingdata-mobile-user-demographics/app_events.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/talkingdata-mobile-user-demographics/app_labels.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/talkingdata-mobile-user-demographics/events.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/talkingdata-mobile-user-demographics/gender_age_train.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/talkingdata-mobile-user-demographics/label_categories.csv 6 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/talkingdata-mobile-user-demographics/phone_brand_device_model.csv 7 | -------------------------------------------------------------------------------- /others/talkingdata-mobile-user-demographics/tables/talkingdata-mobile-user-demographics_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE app_events 2 | ( 3 | event_id INT, 4 | app_id BIGINT, 5 | is_installed INT, 6 | is_active INT 7 | ); 8 | CREATE TABLE app_labels 9 | ( 10 | app_id BIGINT, 11 | label_id INT 12 | ); 13 | CREATE TABLE events 14 | ( 15 | event_id INT, 16 | device_id BIGINT, 17 | timestamp TIMESTAMP, 18 | longitude DOUBLE PRECISION, 19 | latitude DOUBLE PRECISION 20 | ); 21 | CREATE TABLE gender_age_train 22 | ( 23 | device_id BIGINT, 24 | gender VARCHAR, 25 | age INT, 26 | group0 VARCHAR 27 | ); 28 | CREATE TABLE label_categories 29 | ( 30 | label_id INT, 31 | category VARCHAR 32 | ); 33 | CREATE TABLE phone_brand_device_model 34 | ( 35 | device_id BIGINT, 36 | phone_brand VARCHAR, 37 | device_model VARCHAR 38 | ); 39 | -------------------------------------------------------------------------------- /others/unimelb/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/others/unimelb/unimelb_training.csv 2 | -------------------------------------------------------------------------------- /retails/competitive-data-science-predict-future-sales/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/competitive-data-science-predict-future-sales/item_categories.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/competitive-data-science-predict-future-sales/items.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/competitive-data-science-predict-future-sales/sales_train.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/competitive-data-science-predict-future-sales/shops.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/competitive-data-science-predict-future-sales/test.csv 6 | -------------------------------------------------------------------------------- /retails/competitive-data-science-predict-future-sales/queries/competitive-data-science-predict-future-sales_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `date` as date_1, 7 | `date` as sales_train_date_original_0, 8 | `item_cnt_day` as sales_train_item_cnt_day_original_2, 9 | `date_block_num` as sales_train_date_block_num_original_8, 10 | `item_id` as sales_train_item_id_original_9, 11 | `item_price` as sales_train_item_price_original_10, 12 | case when !isnull(at(`date_block_num`, 0)) over sales_train_item_id_date_0s_2d_100 then count_where(`date_block_num`, `date_block_num` = at(`date_block_num`, 0)) over sales_train_item_id_date_0s_2d_100 else null end as sales_train_date_block_num_window_count_11, 13 | case when !isnull(at(`date_block_num`, 0)) over sales_train_item_id_date_0s_5d_200 then count_where(`date_block_num`, `date_block_num` = at(`date_block_num`, 0)) over sales_train_item_id_date_0s_5d_200 else null end as sales_train_date_block_num_window_count_12, 14 | case when !isnull(at(`item_id`, 0)) over sales_train_date_block_num_date_0s_64d_200 then count_where(`item_id`, `item_id` = at(`item_id`, 0)) over sales_train_date_block_num_date_0s_64d_200 else null end as sales_train_item_id_window_count_13, 15 | sum(`item_price`) over sales_train_item_id_date_0s_2d_200 as sales_train_item_price_window_sum_14, 16 | `item_id` as sales_train_item_id_combine_15, 17 | `item_id` as sales_train_item_id_combine_16, 18 | sum(`item_price`) over sales_train_item_id_date_0s_5d_200 as sales_train_item_price_window_sum_17, 19 | min(`item_price`) over sales_train_item_id_date_0s_2d_200 as sales_train_item_price_window_min_18, 20 | min(`item_price`) over sales_train_item_id_date_0s_5d_200 as sales_train_item_price_window_min_19, 21 | min(`item_price`) over sales_train_date_block_num_date_0s_64d_200 as sales_train_item_price_window_min_20, 22 | avg(`item_price`) over sales_train_date_block_num_date_0s_32d_100 as sales_train_item_price_window_avg_21, 23 | avg(`item_price`) over sales_train_item_id_date_0s_32d_100 as sales_train_item_price_window_avg_22, 24 | distinct_count(`date_block_num`) over sales_train_item_id_date_0s_64d_200 as sales_train_date_block_num_window_unique_count_23, 25 | distinct_count(`date_block_num`) over sales_train_item_id_date_0_10_ as sales_train_date_block_num_window_unique_count_24, 26 | case when 1 < dayofweek(timestamp(`date`)) and dayofweek(timestamp(`date`)) < 7 then 1 else 0 end as sales_train_date_isweekday_25, 27 | hour(timestamp(`date`)) as sales_train_date_hourofday_26, 28 | fz_top1_ratio(`date_block_num`) over sales_train_item_id_date_0s_32d_200 as sales_train_date_block_num_window_top1_ratio_27, 29 | fz_top1_ratio(`item_id`) over sales_train_date_block_num_date_0s_32d_200 as sales_train_item_id_window_top1_ratio_28 30 | from 31 | `sales_train` 32 | window sales_train_item_id_date_0s_2d_100 as (partition by `item_id` order by `date` rows_range between 2d open preceding and 0s preceding MAXSIZE 100), 33 | sales_train_item_id_date_0s_5d_200 as (partition by `item_id` order by `date` rows_range between 5d open preceding and 0s preceding MAXSIZE 200), 34 | sales_train_date_block_num_date_0s_64d_200 as (partition by `date_block_num` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 35 | sales_train_item_id_date_0s_2d_200 as (partition by `item_id` order by `date` rows_range between 2d open preceding and 0s preceding MAXSIZE 200), 36 | sales_train_date_block_num_date_0s_32d_100 as (partition by `date_block_num` order by `date` rows_range between 32d open preceding and 0s preceding MAXSIZE 100), 37 | sales_train_item_id_date_0s_32d_100 as (partition by `item_id` order by `date` rows_range between 32d open preceding and 0s preceding MAXSIZE 100), 38 | sales_train_item_id_date_0s_64d_200 as (partition by `item_id` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 39 | sales_train_item_id_date_0_10_ as (partition by `item_id` order by `date` rows between 10 open preceding and 0 preceding), 40 | sales_train_item_id_date_0s_32d_200 as (partition by `item_id` order by `date` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 41 | sales_train_date_block_num_date_0s_32d_200 as (partition by `date_block_num` order by `date` rows_range between 32d open preceding and 0s preceding MAXSIZE 200)) 42 | as out0 43 | last join 44 | ( 45 | select 46 | `sales_train`.`date` as date_3, 47 | `items_item_id`.`item_category_id` as items_item_category_id_multi_direct_3, 48 | `items_item_id`.`item_name` as items_item_name_multi_direct_4, 49 | `shops_shop_id`.`shop_name` as shops_shop_name_multi_direct_5, 50 | `test_item_id`.`ID` as test_ID_multi_direct_6, 51 | `test_item_id`.`shop_id` as test_shop_id_multi_direct_7 52 | from 53 | `sales_train` 54 | last join `items` as `items_item_id` on `sales_train`.`item_id` = `items_item_id`.`item_id` 55 | last join `shops` as `shops_shop_id` on `sales_train`.`shop_id` = `shops_shop_id`.`shop_id` 56 | last join `test` as `test_item_id` on `sales_train`.`item_id` = `test_item_id`.`item_id`) 57 | as out1 58 | on out0.date_1 = out1.date_3 59 | ; -------------------------------------------------------------------------------- /retails/competitive-data-science-predict-future-sales/tables/competitive-data-science-predict-future-sales_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE item_categories 2 | ( 3 | item_category_name VARCHAR, 4 | item_category_id INT 5 | ); 6 | CREATE TABLE items 7 | ( 8 | item_name VARCHAR, 9 | item_id VARCHAR, 10 | item_category_id VARCHAR 11 | ); 12 | CREATE TABLE sales_train 13 | ( 14 | date VARCHAR, 15 | date_block_num INT, 16 | shop_id INT, 17 | item_id INT, 18 | item_price DOUBLE PRECISION, 19 | item_cnt_day DOUBLE PRECISION 20 | ); 21 | CREATE TABLE shops 22 | ( 23 | shop_name VARCHAR, 24 | shop_id INT 25 | ); 26 | CREATE TABLE test 27 | ( 28 | ID INT, 29 | shop_id INT, 30 | item_id INT 31 | ); 32 | -------------------------------------------------------------------------------- /retails/coupon-purchase-prediction/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/coupon-purchase-prediction/coupon_area_train.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/coupon-purchase-prediction/coupon_detail_train.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/coupon-purchase-prediction/coupon_list_train.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/coupon-purchase-prediction/coupon_visit_train.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/coupon-purchase-prediction/prefecture_locations.csv 6 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/coupon-purchase-prediction/user_list.csv 7 | -------------------------------------------------------------------------------- /retails/coupon-purchase-prediction/tables/coupon-purchase-prediction_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE coupon_area_train (SMALL_AREA_NAME VARCHAR, PREF_NAME VARCHAR, COUPON_ID_hash VARCHAR); 2 | CREATE TABLE coupon_detail_train (ITEM_COUNT INT, I_DATE TIMESTAMP, SMALL_AREA_NAME VARCHAR, PURCHASEID_hash VARCHAR, USER_ID_hash VARCHAR, COUPON_ID_hash VARCHAR); 3 | CREATE TABLE coupon_list_train (CAPSULE_TEXT VARCHAR, GENRE_NAME VARCHAR, PRICE_RATE INT, CATALOG_PRICE INT, DISCOUNT_PRICE INT, DISPFROM TIMESTAMP, DISPEND TIMESTAMP, DISPPERIOD INT, VALIDFROM VARCHAR, VALIDEND VARCHAR, VALIDPERIOD VARCHAR, USABLE_DATE_MON VARCHAR, USABLE_DATE_TUE VARCHAR, USABLE_DATE_WED VARCHAR, USABLE_DATE_THU VARCHAR, USABLE_DATE_FRI VARCHAR, USABLE_DATE_SAT VARCHAR, USABLE_DATE_SUN VARCHAR, USABLE_DATE_HOLIDAY VARCHAR, USABLE_DATE_BEFORE_HOLIDAY VARCHAR, large_area_name VARCHAR, ken_name VARCHAR, small_area_name VARCHAR, COUPON_ID_hash VARCHAR); 4 | CREATE TABLE coupon_visit_train (PURCHASE_FLG INT, I_DATE TIMESTAMP, PAGE_SERIAL INT, REFERRER_hash VARCHAR, VIEW_COUPON_ID_hash VARCHAR, USER_ID_hash VARCHAR, SESSION_ID_hash VARCHAR, PURCHASEID_hash VARCHAR); 5 | CREATE TABLE prefecture_locations (PREF_NAME VARCHAR, PREFECTUAL_OFFICE VARCHAR, LATITUDE DOUBLE PRECISION, LONGITUDE DOUBLE PRECISION); 6 | CREATE TABLE user_list (REG_DATE TIMESTAMP, SEX_ID VARCHAR, AGE INT, WITHDRAW_DATE VARCHAR, PREF_NAME VARCHAR, USER_ID_hash VARCHAR); 7 | -------------------------------------------------------------------------------- /retails/ecommerce-customerssales-record/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/ecommerce-customerssales-record/Sales.csv 2 | -------------------------------------------------------------------------------- /retails/ecommerce-customerssales-record/tables/ecommerce-customerssales-record_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE Sales (index INT, CustomerID INT, TOTAL_ORDERS INT, REVENUE DOUBLE PRECISION, AVERAGE_ORDER_VALUE DOUBLE PRECISION, CARRIAGE_REVENUE DOUBLE PRECISION, AVERAGESHIPPING DOUBLE PRECISION, FIRST_ORDER_DATE TIMESTAMP, LATEST_ORDER_DATE TIMESTAMP, AVGDAYSBETWEENORDERS DOUBLE PRECISION, DAYSSINCELASTORDER INT, MONDAY_ORDERS INT, TUESDAY_ORDERS INT, WEDNESDAY_ORDERS INT, THURSDAY_ORDERS INT, FRIDAY_ORDERS INT, SATURDAY_ORDERS INT, SUNDAY_ORDERS INT, MONDAY_REVENUE DOUBLE PRECISION, TUESDAY_REVENUE DOUBLE PRECISION, WEDNESDAY_REVENUE DOUBLE PRECISION, THURSDAY_REVENUE DOUBLE PRECISION, FRIDAY_REVENUE DOUBLE PRECISION, SATURDAY_REVENUE DOUBLE PRECISION, SUNDAY_REVENUE DOUBLE PRECISION, WEEK1_DAY01_DAY07_ORDERS INT, WEEK2_DAY08_DAY15_ORDERS INT, WEEK3_DAY16_DAY23_ORDERS INT, WEEK4_DAY24_DAY31_ORDERS INT, WEEK1_DAY01_DAY07_REVENUE DOUBLE PRECISION, WEEK2_DAY08_DAY15_REVENUE DOUBLE PRECISION, WEEK3_DAY16_DAY23_REVENUE DOUBLE PRECISION, WEEK4_DAY24_DAY31_REVENUE DOUBLE PRECISION, TIME_0000_0600_ORDERS INT, TIME_0601_1200_ORDERS INT, TIME_1200_1800_ORDERS INT, TIME_1801_2359_ORDERS INT, TIME_0000_0600_REVENUE DOUBLE PRECISION, TIME_0601_1200_REVENUE DOUBLE PRECISION, TIME_1200_1800_REVENUE DOUBLE PRECISION, TIME_1801_2359_REVENUE DOUBLE PRECISION); 2 | -------------------------------------------------------------------------------- /retails/favorita-grocery-sales-forecasting/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/favorita-grocery-sales-forecasting/holidays_events.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/favorita-grocery-sales-forecasting/items.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/favorita-grocery-sales-forecasting/oil.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/favorita-grocery-sales-forecasting/stores.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/favorita-grocery-sales-forecasting/train.csv 6 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/favorita-grocery-sales-forecasting/transactions.csv 7 | -------------------------------------------------------------------------------- /retails/favorita-grocery-sales-forecasting/queries/favorita-grocery-sales-forecasting_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `id` as id_1, 7 | `date` as train_date_original_0, 8 | `id` as train_id_original_1, 9 | `unit_sales` as train_unit_sales_original_2, 10 | `item_nbr` as train_item_nbr_original_12, 11 | `store_nbr` as train_store_nbr_original_13, 12 | `item_nbr` as train_item_nbr_combine_14, 13 | `store_nbr` as train_store_nbr_combine_14, 14 | `item_nbr` as train_item_nbr_combine_15, 15 | `item_nbr` as train_item_nbr_combine_16, 16 | `item_nbr` as train_item_nbr_combine_17, 17 | `store_nbr` as train_store_nbr_combine_18, 18 | `store_nbr` as train_store_nbr_combine_21, 19 | `store_nbr` as train_store_nbr_combine_24, 20 | case when !isnull(at(`store_nbr`, 0)) over train_item_nbr_date_0s_2d_100 then count_where(`store_nbr`, `store_nbr` = at(`store_nbr`, 0)) over train_item_nbr_date_0s_2d_100 else null end as train_store_nbr_window_count_25, 21 | case when !isnull(at(`store_nbr`, 0)) over train_item_nbr_date_0s_7d_100 then count_where(`store_nbr`, `store_nbr` = at(`store_nbr`, 0)) over train_item_nbr_date_0s_7d_100 else null end as train_store_nbr_window_count_26, 22 | distinct_count(`item_nbr`) over train_store_nbr_date_0_10_ as train_item_nbr_window_unique_count_28, 23 | distinct_count(`store_nbr`) over train_item_nbr_date_0_10_ as train_store_nbr_window_unique_count_29, 24 | case when !isnull(at(`item_nbr`, 0)) over train_store_nbr_date_0s_14d_100 then count_where(`item_nbr`, `item_nbr` = at(`item_nbr`, 0)) over train_store_nbr_date_0s_14d_100 else null end as train_item_nbr_window_count_30, 25 | fz_top1_ratio(`item_nbr`) over train_store_nbr_date_0_10_ as train_item_nbr_window_top1_ratio_31 26 | from 27 | `train` 28 | window train_item_nbr_date_0s_2d_100 as (partition by `item_nbr` order by `date` rows_range between 2d open preceding and 0s preceding MAXSIZE 100), 29 | train_item_nbr_date_0s_7d_100 as (partition by `item_nbr` order by `date` rows_range between 7d open preceding and 0s preceding MAXSIZE 100), 30 | train_store_nbr_date_0_10_ as (partition by `store_nbr` order by `date` rows between 10 open preceding and 0 preceding), 31 | train_item_nbr_date_0_10_ as (partition by `item_nbr` order by `date` rows between 10 open preceding and 0 preceding), 32 | train_store_nbr_date_0s_14d_100 as (partition by `store_nbr` order by `date` rows_range between 14d open preceding and 0s preceding MAXSIZE 100)) 33 | as out0 34 | last join 35 | ( 36 | select 37 | `train`.`id` as id_4, 38 | `items_item_nbr`.`class` as items_class_multi_direct_3, 39 | `items_item_nbr`.`family` as items_family_multi_direct_4, 40 | `items_item_nbr`.`perishable` as items_perishable_multi_direct_5, 41 | `stores_store_nbr`.`city` as stores_city_multi_direct_6, 42 | `stores_store_nbr`.`cluster` as stores_cluster_multi_direct_7, 43 | `stores_store_nbr`.`state` as stores_state_multi_direct_8, 44 | `stores_store_nbr`.`type` as stores_type_multi_direct_9 45 | from 46 | `train` 47 | last join `items` as `items_item_nbr` on `train`.`item_nbr` = `items_item_nbr`.`item_nbr` 48 | last join `stores` as `stores_store_nbr` on `train`.`store_nbr` = `stores_store_nbr`.`store_nbr`) 49 | as out1 50 | on out0.id_1 = out1.id_4 51 | last join 52 | ( 53 | select 54 | `id` as id_11, 55 | fz_topn_frequency(`transactions`, 3) over transactions_store_nbr_date_5s_64d_100 as transactions_transactions_multi_top3frequency_10, 56 | distinct_count(`transactions`) over transactions_store_nbr_date_5s_64d_100 as transactions_transactions_multi_unique_count_11 57 | from 58 | (select `date` as `date`, `store_nbr` as `store_nbr`, int(0) as `transactions`, id from `train`) 59 | window transactions_store_nbr_date_5s_64d_100 as ( 60 | UNION (select `date`, `store_nbr`, `transactions`, int(0) as id from `transactions`) partition by `store_nbr` order by `date` rows_range between 64d open preceding and 5s preceding MAXSIZE 100 INSTANCE_NOT_IN_WINDOW)) 61 | as out2 62 | on out0.id_1 = out2.id_11 63 | ; -------------------------------------------------------------------------------- /retails/favorita-grocery-sales-forecasting/tables/favorita-grocery-sales-forecasting_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE holidays_events (date TIMESTAMP, type VARCHAR, locale VARCHAR, locale_name VARCHAR, description VARCHAR, transferred VARCHAR); 2 | CREATE TABLE items (item_nbr INT, family VARCHAR, class INT, perishable INT); 3 | CREATE TABLE oil (date TIMESTAMP, dcoilwtico DOUBLE PRECISION); 4 | CREATE TABLE stores (store_nbr INT, city VARCHAR, state VARCHAR, type VARCHAR, cluster INT); 5 | CREATE TABLE train (id INT, date TIMESTAMP, store_nbr INT, item_nbr INT, unit_sales DOUBLE PRECISION, onpromotion VARCHAR); 6 | CREATE TABLE transactions (date TIMESTAMP, store_nbr INT, transactions INT); 7 | -------------------------------------------------------------------------------- /retails/goods/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_0.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_1.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_2.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_3.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_4.csv 6 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_5.csv 7 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_6.csv 8 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_7.csv 9 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_8.csv 10 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_9.csv 11 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_10.csv 12 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_11.csv 13 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_12.csv 14 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_13.csv 15 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_14.csv 16 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_15.csv 17 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_16.csv 18 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_17.csv 19 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_18.csv 20 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_19.csv 21 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_20.csv 22 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_21.csv 23 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_22.csv 24 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_23.csv 25 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_24.csv 26 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_25.csv 27 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_26.csv 28 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_27.csv 29 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_28.csv 30 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_29.csv 31 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_30.csv 32 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_31.csv 33 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_32.csv 34 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_33.csv 35 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_34.csv 36 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_35.csv 37 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_36.csv 38 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_37.csv 39 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_38.csv 40 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_39.csv 41 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/goods/table_40.csv -------------------------------------------------------------------------------- /retails/goods/tables/CreateIndexes.sql: -------------------------------------------------------------------------------- 1 | -- 创建索引 2 | 3 | 4 | CREATE INDEX idx_546055_primary ON table_36 (column_0, column_4, column_2); 5 | 6 | CREATE INDEX idx_546061_primary ON table_5 (column_0, column_1); 7 | 8 | CREATE INDEX idx_546227_primary ON table_19 (column_0); 9 | 10 | CREATE INDEX idx_546250_primary ON table_4 (column_0, column_1); 11 | 12 | CREATE INDEX idx_546262_primary ON table_9 (column_0, column_1); 13 | 14 | CREATE INDEX idx_546256_primary ON table_12 (column_0, column_1); 15 | 16 | CREATE INDEX idx_546112_primary ON table_24 (column_0); 17 | 18 | CREATE INDEX idx_546242_primary ON table_13 (column_19, column_0, column_4, column_12); 19 | 20 | CREATE INDEX idx_546221_primary ON table_3 (column_0); 21 | 22 | CREATE INDEX idx_546172_primary ON table_22 (column_0); 23 | 24 | CREATE INDEX idx_546211_primary ON table_35 (column_0); 25 | 26 | CREATE INDEX idx_546165_primary ON table_39 (column_0); 27 | 28 | CREATE INDEX idx_546207_primary ON table_23 (column_0); 29 | 30 | CREATE INDEX idx_546048_primary ON table_28 (column_0, column_1, column_2); 31 | 32 | CREATE INDEX idx_546074_primary ON table_34 (column_0, column_1); 33 | 34 | CREATE INDEX idx_546137_primary ON table_10 (column_0); 35 | 36 | CREATE INDEX idx_546028_primary ON table_15 (column_0, column_1); 37 | 38 | CREATE INDEX idx_546004_primary ON table_17 (column_0); 39 | 40 | CREATE INDEX idx_546010_primary ON table_32 (column_0); 41 | 42 | CREATE INDEX idx_546130_primary ON table_2 (column_0); 43 | 44 | CREATE INDEX idx_546187_primary ON table_33 (column_0); 45 | 46 | CREATE INDEX idx_546041_primary ON table_8 (column_0, column_1, column_2); 47 | 48 | CREATE INDEX idx_546183_primary ON table_1 (column_0); 49 | 50 | CREATE INDEX idx_546141_primary ON table_20 (column_0, column_2); 51 | 52 | CREATE INDEX idx_546016_primary ON table_14 (column_0); 53 | 54 | CREATE INDEX idx_546268_primary ON table_16 (column_0); 55 | 56 | CREATE INDEX idx_546101_primary ON table_38 (column_0); 57 | 58 | CREATE INDEX idx_546022_primary ON table_27 (column_0); 59 | 60 | CREATE INDEX idx_546265_primary ON table_18 (column_0, column_1); 61 | 62 | CREATE INDEX idx_546119_primary ON table_11 (column_0); 63 | 64 | CREATE INDEX idx_546034_primary ON table_0 (column_0, column_1, column_2); 65 | 66 | CREATE INDEX idx_546108_primary ON table_40 (column_0); 67 | 68 | CREATE INDEX idx_546201_primary ON table_7 (column_0); 69 | 70 | CREATE INDEX idx_546233_primary ON table_37 (column_0); 71 | 72 | CREATE INDEX idx_546194_primary ON table_6 (column_0); 73 | 74 | CREATE INDEX idx_546218_primary ON table_31 (column_0, column_1); 75 | 76 | CREATE INDEX idx_546088_primary ON table_30 (column_0, column_1); 77 | 78 | CREATE INDEX idx_546152_primary ON table_26 (column_0, column_1); 79 | 80 | CREATE INDEX idx_546179_primary ON table_21 (column_0); 81 | 82 | CREATE INDEX idx_546126_primary ON table_25 (column_0); 83 | 84 | CREATE INDEX idx_545998_primary ON table_29 (column_0); 85 | -------------------------------------------------------------------------------- /retails/goods/tables/CreatePrimaryKeys.sql: -------------------------------------------------------------------------------- 1 | -- 添加主键的命令 2 | 3 | 4 | ALTER TABLE table_36 5 | ADD CONSTRAINT idx_546055_primary 6 | PRIMARY KEY (column_0, column_4, column_2); 7 | 8 | 9 | ALTER TABLE table_5 10 | ADD CONSTRAINT idx_546061_primary 11 | PRIMARY KEY (column_0, column_1); 12 | 13 | 14 | ALTER TABLE table_19 15 | ADD CONSTRAINT idx_546227_primary 16 | PRIMARY KEY (column_0); 17 | 18 | 19 | ALTER TABLE table_4 20 | ADD CONSTRAINT idx_546250_primary 21 | PRIMARY KEY (column_0, column_1); 22 | 23 | 24 | ALTER TABLE table_9 25 | ADD CONSTRAINT idx_546262_primary 26 | PRIMARY KEY (column_0, column_1); 27 | 28 | 29 | ALTER TABLE table_12 30 | ADD CONSTRAINT idx_546256_primary 31 | PRIMARY KEY (column_0, column_1); 32 | 33 | 34 | ALTER TABLE table_24 35 | ADD CONSTRAINT idx_546112_primary 36 | PRIMARY KEY (column_0); 37 | 38 | 39 | ALTER TABLE table_13 40 | ADD CONSTRAINT idx_546242_primary 41 | PRIMARY KEY (column_19, column_0, column_4, column_12); 42 | 43 | 44 | ALTER TABLE table_3 45 | ADD CONSTRAINT idx_546221_primary 46 | PRIMARY KEY (column_0); 47 | 48 | 49 | ALTER TABLE table_22 50 | ADD CONSTRAINT idx_546172_primary 51 | PRIMARY KEY (column_0); 52 | 53 | 54 | ALTER TABLE table_35 55 | ADD CONSTRAINT idx_546211_primary 56 | PRIMARY KEY (column_0); 57 | 58 | 59 | ALTER TABLE table_39 60 | ADD CONSTRAINT idx_546165_primary 61 | PRIMARY KEY (column_0); 62 | 63 | 64 | ALTER TABLE table_23 65 | ADD CONSTRAINT idx_546207_primary 66 | PRIMARY KEY (column_0); 67 | 68 | 69 | ALTER TABLE table_28 70 | ADD CONSTRAINT idx_546048_primary 71 | PRIMARY KEY (column_0, column_1, column_2); 72 | 73 | 74 | ALTER TABLE table_34 75 | ADD CONSTRAINT idx_546074_primary 76 | PRIMARY KEY (column_0, column_1); 77 | 78 | 79 | ALTER TABLE table_10 80 | ADD CONSTRAINT idx_546137_primary 81 | PRIMARY KEY (column_0); 82 | 83 | 84 | ALTER TABLE table_15 85 | ADD CONSTRAINT idx_546028_primary 86 | PRIMARY KEY (column_0, column_1); 87 | 88 | 89 | ALTER TABLE table_17 90 | ADD CONSTRAINT idx_546004_primary 91 | PRIMARY KEY (column_0); 92 | 93 | 94 | ALTER TABLE table_32 95 | ADD CONSTRAINT idx_546010_primary 96 | PRIMARY KEY (column_0); 97 | 98 | 99 | ALTER TABLE table_2 100 | ADD CONSTRAINT idx_546130_primary 101 | PRIMARY KEY (column_0); 102 | 103 | 104 | ALTER TABLE table_33 105 | ADD CONSTRAINT idx_546187_primary 106 | PRIMARY KEY (column_0); 107 | 108 | 109 | ALTER TABLE table_8 110 | ADD CONSTRAINT idx_546041_primary 111 | PRIMARY KEY (column_0, column_1, column_2); 112 | 113 | 114 | ALTER TABLE table_1 115 | ADD CONSTRAINT idx_546183_primary 116 | PRIMARY KEY (column_0); 117 | 118 | 119 | ALTER TABLE table_20 120 | ADD CONSTRAINT idx_546141_primary 121 | PRIMARY KEY (column_0, column_2); 122 | 123 | 124 | ALTER TABLE table_14 125 | ADD CONSTRAINT idx_546016_primary 126 | PRIMARY KEY (column_0); 127 | 128 | 129 | ALTER TABLE table_16 130 | ADD CONSTRAINT idx_546268_primary 131 | PRIMARY KEY (column_0); 132 | 133 | 134 | ALTER TABLE table_38 135 | ADD CONSTRAINT idx_546101_primary 136 | PRIMARY KEY (column_0); 137 | 138 | 139 | ALTER TABLE table_27 140 | ADD CONSTRAINT idx_546022_primary 141 | PRIMARY KEY (column_0); 142 | 143 | 144 | ALTER TABLE table_18 145 | ADD CONSTRAINT idx_546265_primary 146 | PRIMARY KEY (column_0, column_1); 147 | 148 | 149 | ALTER TABLE table_11 150 | ADD CONSTRAINT idx_546119_primary 151 | PRIMARY KEY (column_0); 152 | 153 | 154 | ALTER TABLE table_0 155 | ADD CONSTRAINT idx_546034_primary 156 | PRIMARY KEY (column_0, column_1, column_2); 157 | 158 | 159 | ALTER TABLE table_40 160 | ADD CONSTRAINT idx_546108_primary 161 | PRIMARY KEY (column_0); 162 | 163 | 164 | ALTER TABLE table_7 165 | ADD CONSTRAINT idx_546201_primary 166 | PRIMARY KEY (column_0); 167 | 168 | 169 | ALTER TABLE table_37 170 | ADD CONSTRAINT idx_546233_primary 171 | PRIMARY KEY (column_0); 172 | 173 | 174 | ALTER TABLE table_6 175 | ADD CONSTRAINT idx_546194_primary 176 | PRIMARY KEY (column_0); 177 | 178 | 179 | ALTER TABLE table_31 180 | ADD CONSTRAINT idx_546218_primary 181 | PRIMARY KEY (column_0, column_1); 182 | 183 | 184 | ALTER TABLE table_30 185 | ADD CONSTRAINT idx_546088_primary 186 | PRIMARY KEY (column_0, column_1); 187 | 188 | 189 | ALTER TABLE table_26 190 | ADD CONSTRAINT idx_546152_primary 191 | PRIMARY KEY (column_0, column_1); 192 | 193 | 194 | ALTER TABLE table_21 195 | ADD CONSTRAINT idx_546179_primary 196 | PRIMARY KEY (column_0); 197 | 198 | 199 | ALTER TABLE table_25 200 | ADD CONSTRAINT idx_546126_primary 201 | PRIMARY KEY (column_0); 202 | 203 | 204 | ALTER TABLE table_29 205 | ADD CONSTRAINT idx_545998_primary 206 | PRIMARY KEY (column_0); 207 | 208 | -------------------------------------------------------------------------------- /retails/grupo-bimbo-inventory-demand/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/grupo-bimbo-inventory-demand/cliente_tabla.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/grupo-bimbo-inventory-demand/producto_tabla.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/grupo-bimbo-inventory-demand/town_state.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/grupo-bimbo-inventory-demand/train.csv 5 | -------------------------------------------------------------------------------- /retails/grupo-bimbo-inventory-demand/queries/grupo-bimbo-inventory-demand_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `id` as id_1, 7 | `Semana` as train_Semana_original_0, 8 | `id` as train_id_original_1, 9 | `Demanda_uni_equil` as train_Demanda_uni_equil_original_2, 10 | `Agencia_ID` as train_Agencia_ID_original_7, 11 | `Canal_ID` as train_Canal_ID_original_8, 12 | `Cliente_ID` as train_Cliente_ID_original_9, 13 | `Dev_proxima` as train_Dev_proxima_original_10, 14 | `Dev_uni_proxima` as train_Dev_uni_proxima_original_11, 15 | `Ruta_SAK` as train_Ruta_SAK_original_12, 16 | `Venta_hoy` as train_Venta_hoy_original_13, 17 | `Venta_uni_hoy` as train_Venta_uni_hoy_original_14, 18 | min(`Venta_hoy`) over train_Venta_uni_hoy_Semana_0s_1h_200 as train_Venta_hoy_window_min_15, 19 | max(`Venta_hoy`) over train_Venta_uni_hoy_Semana_0s_1h_200 as train_Venta_hoy_window_max_16, 20 | avg(`Venta_hoy`) over train_Venta_uni_hoy_Semana_0s_1h_200 as train_Venta_hoy_window_avg_17, 21 | min(`Venta_hoy`) over train_Venta_uni_hoy_Semana_0_10_ as train_Venta_hoy_window_min_18, 22 | max(`Venta_hoy`) over train_Venta_uni_hoy_Semana_0_10_ as train_Venta_hoy_window_max_19, 23 | avg(`Venta_hoy`) over train_Venta_uni_hoy_Semana_0_10_ as train_Venta_hoy_window_avg_20, 24 | distinct_count(`Ruta_SAK`) over train_Venta_uni_hoy_Semana_0s_1h_200 as train_Ruta_SAK_window_unique_count_21, 25 | distinct_count(`Agencia_ID`) over train_Venta_uni_hoy_Semana_0s_1h_200 as train_Agencia_ID_window_unique_count_22, 26 | distinct_count(`Cliente_ID`) over train_Venta_uni_hoy_Semana_0s_1h_200 as train_Cliente_ID_window_unique_count_23, 27 | `Dev_proxima` as train_Dev_proxima_divide_24, 28 | `Venta_hoy` as train_Venta_hoy_divide_24, 29 | `Venta_hoy` as train_Venta_hoy_divide_25, 30 | `Dev_proxima` as train_Dev_proxima_divide_25, 31 | `Canal_ID` as train_Canal_ID_combine_26, 32 | `Dev_uni_proxima` as train_Dev_uni_proxima_combine_26, 33 | sum(`Venta_hoy`) over train_Venta_uni_hoy_Semana_0s_1h_100 as train_Venta_hoy_window_sum_27, 34 | `Venta_hoy` as train_Venta_hoy_multiply_28, 35 | `Dev_proxima` as train_Dev_proxima_multiply_28, 36 | sum(`Venta_hoy`) over train_Venta_uni_hoy_Semana_0_10_ as train_Venta_hoy_window_sum_29, 37 | case when !isnull(at(`Dev_uni_proxima`, 0)) over train_Canal_ID_Semana_0s_1h_100 then count_where(`Dev_uni_proxima`, `Dev_uni_proxima` = at(`Dev_uni_proxima`, 0)) over train_Canal_ID_Semana_0s_1h_100 else null end as train_Dev_uni_proxima_window_count_30, 38 | log(`Venta_hoy`) as train_Venta_hoy_log_31, 39 | case when !isnull(at(`Venta_uni_hoy`, 0)) over train_Dev_uni_proxima_Semana_0s_1h_200 then count_where(`Venta_uni_hoy`, `Venta_uni_hoy` = at(`Venta_uni_hoy`, 0)) over train_Dev_uni_proxima_Semana_0s_1h_200 else null end as train_Venta_uni_hoy_window_count_32 40 | from 41 | `train` 42 | window train_Venta_uni_hoy_Semana_0s_1h_200 as (partition by `Venta_uni_hoy` order by `Semana` rows_range between 1h open preceding and 0s preceding MAXSIZE 200), 43 | train_Venta_uni_hoy_Semana_0_10_ as (partition by `Venta_uni_hoy` order by `Semana` rows between 10 open preceding and 0 preceding), 44 | train_Venta_uni_hoy_Semana_0s_1h_100 as (partition by `Venta_uni_hoy` order by `Semana` rows_range between 1h open preceding and 0s preceding MAXSIZE 100), 45 | train_Canal_ID_Semana_0s_1h_100 as (partition by `Canal_ID` order by `Semana` rows_range between 1h open preceding and 0s preceding MAXSIZE 100), 46 | train_Dev_uni_proxima_Semana_0s_1h_200 as (partition by `Dev_uni_proxima` order by `Semana` rows_range between 1h open preceding and 0s preceding MAXSIZE 200)) 47 | as out0 48 | last join 49 | ( 50 | select 51 | `train`.`id` as id_4, 52 | `cliente_tabla_Cliente_ID`.`NombreCliente` as cliente_tabla_NombreCliente_multi_direct_3, 53 | `producto_tabla_Producto_ID`.`NombreProducto` as producto_tabla_NombreProducto_multi_direct_4, 54 | `town_state_Agencia_ID`.`State` as town_state_State_multi_direct_5, 55 | `town_state_Agencia_ID`.`Town` as town_state_Town_multi_direct_6 56 | from 57 | `train` 58 | last join `cliente_tabla` as `cliente_tabla_Cliente_ID` on `train`.`Cliente_ID` = `cliente_tabla_Cliente_ID`.`Cliente_ID` 59 | last join `producto_tabla` as `producto_tabla_Producto_ID` on `train`.`Producto_ID` = `producto_tabla_Producto_ID`.`Producto_ID` 60 | last join `town_state` as `town_state_Agencia_ID` on `train`.`Agencia_ID` = `town_state_Agencia_ID`.`Agencia_ID`) 61 | as out1 62 | on out0.id_1 = out1.id_4 63 | ; -------------------------------------------------------------------------------- /retails/grupo-bimbo-inventory-demand/tables/grupo-bimbo-inventory-demand_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE cliente_tabla 2 | ( 3 | Cliente_ID INT, 4 | NombreCliente VARCHAR 5 | ); 6 | CREATE TABLE producto_tabla 7 | ( 8 | Producto_ID INT, 9 | NombreProducto VARCHAR 10 | ); 11 | CREATE TABLE town_state 12 | ( 13 | Agencia_ID INT, 14 | Town VARCHAR, 15 | State VARCHAR 16 | ); 17 | CREATE TABLE train 18 | ( 19 | id INT, 20 | Semana VARCHAR, 21 | Agencia_ID INT, 22 | Canal_ID INT, 23 | Ruta_SAK INT, 24 | Cliente_ID INT, 25 | Producto_ID INT, 26 | Venta_uni_hoy INT, 27 | Venta_hoy DOUBLE PRECISION, 28 | Dev_uni_proxima INT, 29 | Dev_proxima DOUBLE PRECISION, 30 | Demanda_uni_equil INT 31 | ); 32 | -------------------------------------------------------------------------------- /retails/m5-forecasting-accuracy/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/m5-forecasting-accuracy/calendar.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/m5-forecasting-accuracy/sales_train_evaluation.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/m5-forecasting-accuracy/sell_prices.csv 4 | -------------------------------------------------------------------------------- /retails/m5-forecasting-accuracy/queries/m5-forecasting-accuracy_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `d` as d_1, 7 | `date` as calendar_date_original_0, 8 | `d` as calendar_d_original_1, 9 | `event_name_1` as calendar_event_name_1_original_5, 10 | `event_name_2` as calendar_event_name_2_original_6, 11 | `event_type_1` as calendar_event_type_1_original_7, 12 | `event_type_2` as calendar_event_type_2_original_8, 13 | `month` as calendar_month_original_9, 14 | `snap_CA` as calendar_snap_CA_original_10, 15 | `snap_TX` as calendar_snap_TX_original_11, 16 | `snap_WI` as calendar_snap_WI_original_12, 17 | `wday` as calendar_wday_original_13, 18 | `wm_yr_wk` as calendar_wm_yr_wk_original_14, 19 | `year` as calendar_year_original_15, 20 | `snap_CA` as calendar_snap_CA_combine_16, 21 | `month` as calendar_month_combine_16, 22 | `event_type_2` as calendar_event_type_2_combine_17, 23 | `month` as calendar_month_combine_17, 24 | `month` as calendar_month_combine_18, 25 | `snap_CA` as calendar_snap_CA_combine_19, 26 | case when !isnull(at(`month`, 0)) over calendar_wday_date_0s_32d_200 then count_where(`month`, `month` = at(`month`, 0)) over calendar_wday_date_0s_32d_200 else null end as calendar_month_window_count_20, 27 | fz_top1_ratio(`snap_TX`) over calendar_event_name_2_date_0s_64d_100 as calendar_snap_TX_window_top1_ratio_21, 28 | fz_top1_ratio(`snap_TX`) over calendar_event_name_2_date_0s_14d_100 as calendar_snap_TX_window_top1_ratio_22, 29 | fz_top1_ratio(`snap_WI`) over calendar_event_name_2_date_0s_64d_100 as calendar_snap_WI_window_top1_ratio_23, 30 | fz_top1_ratio(`event_type_1`) over calendar_event_name_1_date_0s_64d_100 as calendar_event_type_1_window_top1_ratio_24, 31 | `snap_CA` as calendar_snap_CA_combine_25, 32 | `month` as calendar_month_combine_25, 33 | distinct_count(`event_type_1`) over calendar_snap_CA_date_0s_64d_200 as calendar_event_type_1_window_unique_count_26, 34 | fz_top1_ratio(`event_type_1`) over calendar_snap_CA_date_0s_64d_100 as calendar_event_type_1_window_top1_ratio_27, 35 | distinct_count(`event_type_1`) over calendar_snap_CA_date_0s_14d_100 as calendar_event_type_1_window_unique_count_28, 36 | fz_top1_ratio(`event_type_1`) over calendar_snap_CA_date_0s_14d_100 as calendar_event_type_1_window_top1_ratio_29, 37 | `snap_CA` as calendar_snap_CA_combine_30, 38 | `snap_TX` as calendar_snap_TX_combine_30, 39 | `wday` as calendar_wday_combine_30, 40 | case when !isnull(at(`month`, 0)) over calendar_event_name_2_date_0s_64d_100 then count_where(`month`, `month` = at(`month`, 0)) over calendar_event_name_2_date_0s_64d_100 else null end as calendar_month_window_count_31, 41 | case when !isnull(at(`month`, 0)) over calendar_event_name_2_date_0s_14d_100 then count_where(`month`, `month` = at(`month`, 0)) over calendar_event_name_2_date_0s_14d_100 else null end as calendar_month_window_count_32, 42 | fz_top1_ratio(`event_type_1`) over calendar_event_name_2_date_0s_64d_100 as calendar_event_type_1_window_top1_ratio_33 43 | from 44 | `calendar` 45 | window calendar_wday_date_0s_32d_200 as (partition by `wday` order by `date` rows_range between 32d open preceding and 0s preceding MAXSIZE 200), 46 | calendar_event_name_2_date_0s_64d_100 as (partition by `event_name_2` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 47 | calendar_event_name_2_date_0s_14d_100 as (partition by `event_name_2` order by `date` rows_range between 14d open preceding and 0s preceding MAXSIZE 100), 48 | calendar_event_name_1_date_0s_64d_100 as (partition by `event_name_1` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 49 | calendar_snap_CA_date_0s_64d_200 as (partition by `snap_CA` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 50 | calendar_snap_CA_date_0s_64d_100 as (partition by `snap_CA` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 51 | calendar_snap_CA_date_0s_14d_100 as (partition by `snap_CA` order by `date` rows_range between 14d open preceding and 0s preceding MAXSIZE 100)) 52 | as out0 53 | last join 54 | ( 55 | select 56 | `calendar`.`d` as d_3, 57 | `sell_prices_wm_yr_wk`.`sell_price` as sell_prices_sell_price_multi_direct_2, 58 | `sell_prices_wm_yr_wk`.`item_id` as sell_prices_item_id_multi_direct_3, 59 | `sell_prices_wm_yr_wk`.`store_id` as sell_prices_store_id_multi_direct_4 60 | from 61 | `calendar` 62 | last join `sell_prices` as `sell_prices_wm_yr_wk` on `calendar`.`wm_yr_wk` = `sell_prices_wm_yr_wk`.`wm_yr_wk`) 63 | as out1 64 | on out0.d_1 = out1.d_3 65 | ; -------------------------------------------------------------------------------- /retails/m5-forecasting-uncertainty/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/m5-forecasting-uncertainty/calendar.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/m5-forecasting-uncertainty/sales_train_evaluation.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/m5-forecasting-uncertainty/sell_prices.csv 4 | -------------------------------------------------------------------------------- /retails/m5-forecasting-uncertainty/queries/m5-forecasting-uncertainty_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `d` as d_1, 7 | `date` as calendar_date_original_0, 8 | `d` as calendar_d_original_1, 9 | `event_name_1` as calendar_event_name_1_original_5, 10 | `event_name_2` as calendar_event_name_2_original_6, 11 | `event_type_1` as calendar_event_type_1_original_7, 12 | `event_type_2` as calendar_event_type_2_original_8, 13 | `month` as calendar_month_original_9, 14 | `snap_CA` as calendar_snap_CA_original_10, 15 | `snap_TX` as calendar_snap_TX_original_11, 16 | `snap_WI` as calendar_snap_WI_original_12, 17 | `wday` as calendar_wday_original_13, 18 | `wm_yr_wk` as calendar_wm_yr_wk_original_14, 19 | `year` as calendar_year_original_15, 20 | `snap_CA` as calendar_snap_CA_combine_16, 21 | `month` as calendar_month_combine_16, 22 | `event_type_2` as calendar_event_type_2_combine_17, 23 | `month` as calendar_month_combine_17, 24 | `month` as calendar_month_combine_18, 25 | `snap_CA` as calendar_snap_CA_combine_19, 26 | case when !isnull(at(`month`, 0)) over calendar_wday_date_0s_32d_100 then count_where(`month`, `month` = at(`month`, 0)) over calendar_wday_date_0s_32d_100 else null end as calendar_month_window_count_20, 27 | fz_top1_ratio(`snap_TX`) over calendar_event_name_2_date_0s_64d_200 as calendar_snap_TX_window_top1_ratio_21, 28 | fz_top1_ratio(`snap_TX`) over calendar_event_name_2_date_0s_14d_200 as calendar_snap_TX_window_top1_ratio_22, 29 | fz_top1_ratio(`snap_WI`) over calendar_event_name_2_date_0s_64d_200 as calendar_snap_WI_window_top1_ratio_23, 30 | fz_top1_ratio(`event_type_1`) over calendar_event_name_1_date_0s_64d_100 as calendar_event_type_1_window_top1_ratio_24, 31 | `snap_CA` as calendar_snap_CA_combine_25, 32 | `month` as calendar_month_combine_25, 33 | distinct_count(`event_type_1`) over calendar_snap_CA_date_0s_64d_100 as calendar_event_type_1_window_unique_count_26, 34 | fz_top1_ratio(`event_type_1`) over calendar_snap_CA_date_0s_64d_100 as calendar_event_type_1_window_top1_ratio_27, 35 | distinct_count(`event_type_1`) over calendar_snap_CA_date_0s_14d_100 as calendar_event_type_1_window_unique_count_28, 36 | fz_top1_ratio(`event_type_1`) over calendar_snap_CA_date_0s_14d_100 as calendar_event_type_1_window_top1_ratio_29, 37 | `snap_CA` as calendar_snap_CA_combine_30, 38 | `snap_TX` as calendar_snap_TX_combine_30, 39 | `wday` as calendar_wday_combine_30, 40 | case when !isnull(at(`month`, 0)) over calendar_event_name_2_date_0s_64d_200 then count_where(`month`, `month` = at(`month`, 0)) over calendar_event_name_2_date_0s_64d_200 else null end as calendar_month_window_count_31, 41 | case when !isnull(at(`month`, 0)) over calendar_event_name_2_date_0s_14d_200 then count_where(`month`, `month` = at(`month`, 0)) over calendar_event_name_2_date_0s_14d_200 else null end as calendar_month_window_count_32, 42 | fz_top1_ratio(`event_type_1`) over calendar_event_name_2_date_0s_64d_200 as calendar_event_type_1_window_top1_ratio_33 43 | from 44 | `calendar` 45 | window calendar_wday_date_0s_32d_100 as (partition by `wday` order by `date` rows_range between 32d open preceding and 0s preceding MAXSIZE 100), 46 | calendar_event_name_2_date_0s_64d_200 as (partition by `event_name_2` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 47 | calendar_event_name_2_date_0s_14d_200 as (partition by `event_name_2` order by `date` rows_range between 14d open preceding and 0s preceding MAXSIZE 200), 48 | calendar_event_name_1_date_0s_64d_100 as (partition by `event_name_1` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 49 | calendar_snap_CA_date_0s_64d_100 as (partition by `snap_CA` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 50 | calendar_snap_CA_date_0s_14d_100 as (partition by `snap_CA` order by `date` rows_range between 14d open preceding and 0s preceding MAXSIZE 100)) 51 | as out0 52 | last join 53 | ( 54 | select 55 | `calendar`.`d` as d_3, 56 | `sell_prices_wm_yr_wk`.`sell_price` as sell_prices_sell_price_multi_direct_2, 57 | `sell_prices_wm_yr_wk`.`item_id` as sell_prices_item_id_multi_direct_3, 58 | `sell_prices_wm_yr_wk`.`store_id` as sell_prices_store_id_multi_direct_4 59 | from 60 | `calendar` 61 | last join `sell_prices` as `sell_prices_wm_yr_wk` on `calendar`.`wm_yr_wk` = `sell_prices_wm_yr_wk`.`wm_yr_wk`) 62 | as out1 63 | on out0.d_1 = out1.d_3 64 | ; -------------------------------------------------------------------------------- /retails/material/tables/CreateIndexes.sql: -------------------------------------------------------------------------------- 1 | -- 创建索引 2 | 3 | 4 | CREATE INDEX idx_541487_primary ON table_9 (column_0); 5 | 6 | CREATE INDEX idx_541208_primary ON table_24 (column_1); 7 | 8 | CREATE INDEX idx_541579_primary ON table_42 (column_0); 9 | 10 | CREATE INDEX idx_541373_primary ON table_56 (column_8); 11 | 12 | CREATE INDEX idx_541426_primary ON table_39 (column_0); 13 | 14 | CREATE INDEX idx_541383_primary ON table_34 (column_0); 15 | 16 | CREATE INDEX idx_541312_primary ON table_26 (column_0); 17 | 18 | CREATE INDEX idx_541291_primary ON table_29 (column_0); 19 | 20 | CREATE INDEX idx_541340_primary ON table_74 (column_0); 21 | 22 | CREATE INDEX idx_541656_primary ON table_60 (column_0); 23 | 24 | CREATE INDEX idx_541650_primary ON table_8 (column_0); 25 | 26 | CREATE INDEX idx_541415_primary ON table_75 (column_0); 27 | 28 | CREATE INDEX idx_541662_primary ON table_46 (column_0); 29 | 30 | CREATE INDEX idx_541399_primary ON table_6 (column_0); 31 | 32 | CREATE INDEX idx_541572_primary ON table_71 (column_0); 33 | 34 | CREATE INDEX idx_541240_primary ON table_23 (column_0); 35 | 36 | CREATE INDEX idx_541448_primary ON table_12 (column_1); 37 | 38 | CREATE INDEX idx_541599_primary ON table_70 (column_0); 39 | 40 | CREATE INDEX idx_541309_primary ON table_48 (column_1, column_2); 41 | 42 | CREATE INDEX idx_541267_primary ON table_35 (column_0); 43 | 44 | CREATE INDEX idx_541475_primary ON table_16 (column_0); 45 | 46 | CREATE INDEX idx_541152_primary ON table_57 (column_0); 47 | 48 | CREATE INDEX idx_541668_primary ON table_76 (column_0); 49 | 50 | CREATE INDEX idx_541202_primary ON table_47 (column_0); 51 | 52 | CREATE INDEX idx_541535_primary ON table_77 (column_0); 53 | 54 | CREATE INDEX idx_541686_primary ON table_25 (column_0); 55 | 56 | CREATE INDEX idx_541280_primary ON table_3 (column_14); 57 | 58 | CREATE INDEX idx_541429_primary ON table_4 (column_0); 59 | 60 | CREATE INDEX idx_541259_primary ON table_1 (column_23); 61 | 62 | CREATE INDEX idx_541144_primary ON table_43 (column_0); 63 | 64 | CREATE INDEX idx_541644_primary ON table_31 (column_0); 65 | 66 | CREATE INDEX idx_541168_primary ON table_38 (column_0); 67 | 68 | CREATE INDEX idx_541613_primary ON table_19 (column_0); 69 | 70 | CREATE INDEX idx_541674_primary ON table_61 (column_0); 71 | 72 | CREATE INDEX idx_541249_primary ON table_32 (column_0); 73 | 74 | CREATE INDEX idx_541586_primary ON table_21 (column_0); 75 | 76 | CREATE INDEX idx_541607_primary ON table_49 (column_0); 77 | 78 | CREATE INDEX idx_541331_primary ON table_5 (column_0); 79 | 80 | CREATE INDEX idx_541441_primary ON table_28 (column_0); 81 | 82 | CREATE INDEX idx_541408_primary ON table_7 (column_0); 83 | 84 | CREATE INDEX idx_541562_primary ON table_73 (column_8); 85 | 86 | CREATE INDEX idx_541620_primary ON table_17 (column_0); 87 | 88 | CREATE INDEX idx_541320_primary ON table_59 (column_26); 89 | 90 | CREATE INDEX idx_541454_primary ON table_30 (column_0); 91 | 92 | CREATE INDEX idx_541361_primary ON table_44 (column_14); 93 | 94 | CREATE INDEX idx_541495_primary ON table_41 (column_0); 95 | 96 | CREATE INDEX idx_541539_primary ON table_52 (column_0); 97 | 98 | CREATE INDEX idx_541632_primary ON table_58 (column_0); 99 | 100 | CREATE INDEX idx_541692_primary ON table_78 (column_0); 101 | 102 | CREATE INDEX idx_541184_primary ON table_13 (column_0); 103 | 104 | CREATE INDEX idx_541626_primary ON table_55 (column_0); 105 | 106 | CREATE INDEX idx_541232_primary ON table_63 (column_0); 107 | 108 | CREATE INDEX idx_541390_primary ON table_66 (column_0); 109 | 110 | CREATE INDEX idx_541698_primary ON table_14 (column_0); 111 | 112 | CREATE INDEX idx_541175_primary ON table_45 (column_0); 113 | 114 | CREATE INDEX idx_541680_primary ON table_15 (column_0); 115 | 116 | CREATE INDEX idx_541509_primary ON table_36 (column_23); 117 | 118 | CREATE INDEX idx_541526_primary ON table_69 (column_0); 119 | 120 | CREATE INDEX idx_541705_primary ON table_51 (column_0); 121 | 122 | CREATE INDEX idx_541517_primary ON table_2 (column_0); 123 | 124 | CREATE INDEX idx_541592_primary ON table_65 (column_0); 125 | -------------------------------------------------------------------------------- /retails/orders/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_0.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_1.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_2.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_3.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_4.csv 6 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_5.csv 7 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_6.csv 8 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_7.csv 9 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_8.csv 10 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_9.csv 11 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_10.csv 12 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_11.csv 13 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_12.csv 14 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_13.csv 15 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_14.csv 16 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_15.csv 17 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_16.csv 18 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_17.csv 19 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_18.csv 20 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_19.csv 21 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_20.csv 22 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_21.csv 23 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_22.csv 24 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_23.csv 25 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_24.csv 26 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_25.csv 27 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_26.csv 28 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_27.csv 29 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_28.csv 30 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_29.csv 31 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_30.csv 32 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_31.csv 33 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_32.csv 34 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_33.csv 35 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/orders/table_34.csv -------------------------------------------------------------------------------- /retails/orders/tables/CreateIndexes.sql: -------------------------------------------------------------------------------- 1 | -- 创建索引 2 | 3 | 4 | CREATE INDEX idx_546431_primary ON table_12 ("column_0;"); 5 | 6 | CREATE INDEX idx_546588_primary ON table_5 ("column_0;"); 7 | 8 | CREATE INDEX idx_546506_primary ON table_13 ("column_12;"); 9 | 10 | CREATE INDEX idx_546402_primary ON table_20 ("column_0;"); 11 | 12 | CREATE INDEX idx_546476_primary ON table_4 ("column_0;"); 13 | 14 | CREATE INDEX idx_546408_primary ON table_7 ("column_0;"); 15 | 16 | CREATE INDEX idx_546421_primary ON table_19 ("column_5;"); 17 | 18 | CREATE INDEX idx_546450_primary ON table_22 ("column_3;"); 19 | 20 | CREATE INDEX idx_546462_primary ON table_6 ("column_1;"); 21 | 22 | CREATE INDEX idx_546581_primary ON table_33 ("column_11;"); 23 | 24 | CREATE INDEX idx_546555_primary ON table_30 ("column_5;"); 25 | 26 | CREATE INDEX idx_546388_primary ON table_34 ("column_1;"); 27 | 28 | CREATE INDEX idx_546561_primary ON table_18 ("column_6;"); 29 | 30 | CREATE INDEX idx_546567_primary ON table_15 ("column_6;"); 31 | 32 | CREATE INDEX idx_546542_primary ON table_9 ("column_10;"); 33 | 34 | CREATE INDEX idx_546591_primary ON table_28 ("column_2;"); 35 | 36 | CREATE INDEX idx_546394_primary ON table_14 ("column_2;"); 37 | 38 | CREATE INDEX idx_546573_primary ON table_11 ("column_0;"); 39 | 40 | CREATE INDEX idx_546604_primary ON table_17 ("column_0;"); 41 | 42 | CREATE INDEX idx_546513_primary ON table_32 ("column_1;"); 43 | 44 | CREATE INDEX idx_546486_primary ON table_27 ("column_5;"); 45 | 46 | CREATE INDEX idx_546549_primary ON table_16 ("column_28;"); 47 | -------------------------------------------------------------------------------- /retails/orders/tables/CreatePrimaryKeys.sql: -------------------------------------------------------------------------------- 1 | -- 添加主键的命令 2 | 3 | 4 | ALTER TABLE table_12 5 | ADD CONSTRAINT idx_546431_primary 6 | PRIMARY KEY ("column_0;"); 7 | 8 | 9 | ALTER TABLE table_5 10 | ADD CONSTRAINT idx_546588_primary 11 | PRIMARY KEY ("column_0;"); 12 | 13 | 14 | ALTER TABLE table_13 15 | ADD CONSTRAINT idx_546506_primary 16 | PRIMARY KEY ("column_12;"); 17 | 18 | 19 | ALTER TABLE table_20 20 | ADD CONSTRAINT idx_546402_primary 21 | PRIMARY KEY ("column_0;"); 22 | 23 | 24 | ALTER TABLE table_4 25 | ADD CONSTRAINT idx_546476_primary 26 | PRIMARY KEY ("column_0;"); 27 | 28 | 29 | ALTER TABLE table_7 30 | ADD CONSTRAINT idx_546408_primary 31 | PRIMARY KEY ("column_0;"); 32 | 33 | 34 | ALTER TABLE table_19 35 | ADD CONSTRAINT idx_546421_primary 36 | PRIMARY KEY ("column_5;"); 37 | 38 | 39 | ALTER TABLE table_22 40 | ADD CONSTRAINT idx_546450_primary 41 | PRIMARY KEY ("column_3;"); 42 | 43 | 44 | ALTER TABLE table_6 45 | ADD CONSTRAINT idx_546462_primary 46 | PRIMARY KEY ("column_1;"); 47 | 48 | 49 | ALTER TABLE table_33 50 | ADD CONSTRAINT idx_546581_primary 51 | PRIMARY KEY ("column_11;"); 52 | 53 | 54 | ALTER TABLE table_30 55 | ADD CONSTRAINT idx_546555_primary 56 | PRIMARY KEY ("column_5;"); 57 | 58 | 59 | ALTER TABLE table_34 60 | ADD CONSTRAINT idx_546388_primary 61 | PRIMARY KEY ("column_1;"); 62 | 63 | 64 | ALTER TABLE table_18 65 | ADD CONSTRAINT idx_546561_primary 66 | PRIMARY KEY ("column_6;"); 67 | 68 | 69 | ALTER TABLE table_15 70 | ADD CONSTRAINT idx_546567_primary 71 | PRIMARY KEY ("column_6;"); 72 | 73 | 74 | ALTER TABLE table_9 75 | ADD CONSTRAINT idx_546542_primary 76 | PRIMARY KEY ("column_10;"); 77 | 78 | 79 | ALTER TABLE table_28 80 | ADD CONSTRAINT idx_546591_primary 81 | PRIMARY KEY ("column_2;"); 82 | 83 | 84 | ALTER TABLE table_14 85 | ADD CONSTRAINT idx_546394_primary 86 | PRIMARY KEY ("column_2;"); 87 | 88 | 89 | ALTER TABLE table_11 90 | ADD CONSTRAINT idx_546573_primary 91 | PRIMARY KEY ("column_0;"); 92 | 93 | 94 | ALTER TABLE table_17 95 | ADD CONSTRAINT idx_546604_primary 96 | PRIMARY KEY ("column_0;"); 97 | 98 | 99 | ALTER TABLE table_32 100 | ADD CONSTRAINT idx_546513_primary 101 | PRIMARY KEY ("column_1;"); 102 | 103 | 104 | ALTER TABLE table_27 105 | ADD CONSTRAINT idx_546486_primary 106 | PRIMARY KEY ("column_5;"); 107 | 108 | 109 | ALTER TABLE table_16 110 | ADD CONSTRAINT idx_546549_primary 111 | PRIMARY KEY ("column_28;"); 112 | 113 | -------------------------------------------------------------------------------- /retails/rossmann-store-sales/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/rossmann-store-sales/store.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/rossmann-store-sales/train.csv 3 | -------------------------------------------------------------------------------- /retails/rossmann-store-sales/queries/rossmann-store-sales_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `Id` as Id_1, 7 | `Date` as train_Date_original_0, 8 | `Id` as train_Id_original_1, 9 | `Sales` as train_Sales_original_2, 10 | `Customers` as train_Customers_original_12, 11 | `DayOfWeek` as train_DayOfWeek_original_13, 12 | `Open` as train_Open_original_14, 13 | `Promo` as train_Promo_original_15, 14 | `SchoolHoliday` as train_SchoolHoliday_original_16, 15 | `StateHoliday` as train_StateHoliday_original_17, 16 | `Store` as train_Store_original_18, 17 | `Open` as train_Open_combine_19, 18 | `Open` as train_Open_combine_20, 19 | `Open` as train_Open_combine_21, 20 | `Open` as train_Open_combine_22, 21 | case when !isnull(at(`Open`, 0)) over train_Customers_Date_0s_64d_100 then count_where(`Open`, `Open` = at(`Open`, 0)) over train_Customers_Date_0s_64d_100 else null end as train_Open_window_count_23, 22 | case when !isnull(at(`Open`, 0)) over train_Customers_Date_0s_14d_100 then count_where(`Open`, `Open` = at(`Open`, 0)) over train_Customers_Date_0s_14d_100 else null end as train_Open_window_count_24, 23 | case when !isnull(at(`StateHoliday`, 0)) over train_Customers_Date_0s_14d_100 then count_where(`StateHoliday`, `StateHoliday` = at(`StateHoliday`, 0)) over train_Customers_Date_0s_14d_100 else null end as train_StateHoliday_window_count_25, 24 | case when !isnull(at(`StateHoliday`, 0)) over train_Customers_Date_0s_64d_100 then count_where(`StateHoliday`, `StateHoliday` = at(`StateHoliday`, 0)) over train_Customers_Date_0s_64d_100 else null end as train_StateHoliday_window_count_26, 25 | case when !isnull(at(`Customers`, 0)) over train_DayOfWeek_Date_0s_64d_200 then count_where(`Customers`, `Customers` = at(`Customers`, 0)) over train_DayOfWeek_Date_0s_64d_200 else null end as train_Customers_window_count_27, 26 | case when !isnull(at(`Customers`, 0)) over train_Open_Date_0s_64d_200 then count_where(`Customers`, `Customers` = at(`Customers`, 0)) over train_Open_Date_0s_64d_200 else null end as train_Customers_window_count_28, 27 | fz_top1_ratio(`Customers`) over train_Open_Date_0s_64d_100 as train_Customers_window_top1_ratio_29, 28 | distinct_count(`Customers`) over train_Open_Date_0_10_ as train_Customers_window_unique_count_30, 29 | distinct_count(`Customers`) over train_Open_Date_0s_64d_100 as train_Customers_window_unique_count_31, 30 | case when !isnull(at(`Customers`, 0)) over train_Open_Date_0_10_ then count_where(`Customers`, `Customers` = at(`Customers`, 0)) over train_Open_Date_0_10_ else null end as train_Customers_window_count_32, 31 | fz_top1_ratio(`Store`) over train_Customers_Date_0s_14d_100 as train_Store_window_top1_ratio_33, 32 | case when !isnull(at(`Promo`, 0)) over train_Customers_Date_0s_64d_100 then count_where(`Promo`, `Promo` = at(`Promo`, 0)) over train_Customers_Date_0s_64d_100 else null end as train_Promo_window_count_34, 33 | case when !isnull(at(`Promo`, 0)) over train_Customers_Date_0s_14d_100 then count_where(`Promo`, `Promo` = at(`Promo`, 0)) over train_Customers_Date_0s_14d_100 else null end as train_Promo_window_count_35, 34 | fz_top1_ratio(`Customers`) over train_Open_Date_0_10_ as train_Customers_window_top1_ratio_36 35 | from 36 | `train` 37 | window train_Customers_Date_0s_64d_100 as (partition by `Customers` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 38 | train_Customers_Date_0s_14d_100 as (partition by `Customers` order by `Date` rows_range between 14d open preceding and 0s preceding MAXSIZE 100), 39 | train_DayOfWeek_Date_0s_64d_200 as (partition by `DayOfWeek` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 40 | train_Open_Date_0s_64d_200 as (partition by `Open` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 41 | train_Open_Date_0s_64d_100 as (partition by `Open` order by `Date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 42 | train_Open_Date_0_10_ as (partition by `Open` order by `Date` rows between 10 open preceding and 0 preceding)) 43 | as out0 44 | last join 45 | ( 46 | select 47 | `train`.`Id` as Id_4, 48 | `store_Store`.`Assortment` as store_Assortment_multi_direct_3, 49 | `store_Store`.`CompetitionDistance` as store_CompetitionDistance_multi_direct_4, 50 | `store_Store`.`CompetitionOpenSinceMonth` as store_CompetitionOpenSinceMonth_multi_direct_5, 51 | `store_Store`.`CompetitionOpenSinceYear` as store_CompetitionOpenSinceYear_multi_direct_6, 52 | `store_Store`.`Promo2` as store_Promo2_multi_direct_7, 53 | `store_Store`.`Promo2SinceWeek` as store_Promo2SinceWeek_multi_direct_8, 54 | `store_Store`.`Promo2SinceYear` as store_Promo2SinceYear_multi_direct_9, 55 | `store_Store`.`PromoInterval` as store_PromoInterval_multi_direct_10, 56 | `store_Store`.`StoreType` as store_StoreType_multi_direct_11 57 | from 58 | `train` 59 | last join `store` as `store_Store` on `train`.`Store` = `store_Store`.`Store`) 60 | as out1 61 | on out0.Id_1 = out1.Id_4 62 | ; -------------------------------------------------------------------------------- /retails/rossmann-store-sales/tables/rossmann-store-sales_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE store (Store INT, StoreType VARCHAR, Assortment VARCHAR, CompetitionDistance INT, CompetitionOpenSinceMonth INT, CompetitionOpenSinceYear INT, Promo2 INT, Promo2SinceWeek INT, Promo2SinceYear INT, PromoInterval VARCHAR); 2 | CREATE TABLE train (Store INT, DayOfWeek INT, Date TIMESTAMP, Sales INT, Customers INT, Open INT, Promo INT, StateHoliday VARCHAR, SchoolHoliday INT); 3 | -------------------------------------------------------------------------------- /retails/shopmall/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_0.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_1.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_2.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_3.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_4.csv 6 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_5.csv 7 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_6.csv 8 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_7.csv 9 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_8.csv 10 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_9.csv 11 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_10.csv 12 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_11.csv 13 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_12.csv 14 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_13.csv 15 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_14.csv 16 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_15.csv 17 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_16.csv 18 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_17.csv 19 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_18.csv 20 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_19.csv 21 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_20.csv 22 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_21.csv 23 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_22.csv 24 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_23.csv 25 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_24.csv 26 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_25.csv 27 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_26.csv 28 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_27.csv 29 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_28.csv 30 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_29.csv 31 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_30.csv 32 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_31.csv 33 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_32.csv 34 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_33.csv 35 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/shopmall/table_34.csv 36 | -------------------------------------------------------------------------------- /retails/shopmall/tables/CreateIndexes.sql: -------------------------------------------------------------------------------- 1 | -- 创建索引 2 | 3 | 4 | CREATE INDEX idx_546506_primary ON table_12 ("column_12;"); 5 | 6 | CREATE INDEX idx_546486_primary ON table_15 ("column_5;"); 7 | 8 | CREATE INDEX idx_546408_primary ON table_23 ("column_0;"); 9 | 10 | CREATE INDEX idx_546581_primary ON table_21 ("column_11;"); 11 | 12 | CREATE INDEX idx_546561_primary ON table_2 ("column_6;"); 13 | 14 | CREATE INDEX idx_546388_primary ON table_17 ("column_1;"); 15 | 16 | CREATE INDEX idx_546402_primary ON table_32 ("column_0;"); 17 | 18 | CREATE INDEX idx_546394_primary ON table_9 ("column_2;"); 19 | 20 | CREATE INDEX idx_546555_primary ON table_7 ("column_5;"); 21 | 22 | CREATE INDEX idx_546542_primary ON table_34 ("column_10;"); 23 | 24 | CREATE INDEX idx_546588_primary ON table_4 ("column_0;"); 25 | 26 | CREATE INDEX idx_546513_primary ON table_3 ("column_1;"); 27 | 28 | CREATE INDEX idx_546462_primary ON table_22 ("column_1;"); 29 | 30 | CREATE INDEX idx_546549_primary ON table_8 ("column_28;"); 31 | 32 | CREATE INDEX idx_546450_primary ON table_16 ("column_3;"); 33 | 34 | CREATE INDEX idx_546421_primary ON table_10 ("column_5;"); 35 | 36 | CREATE INDEX idx_546431_primary ON table_18 ("column_0;"); 37 | 38 | CREATE INDEX idx_546604_primary ON table_30 ("column_0;"); 39 | 40 | CREATE INDEX idx_546567_primary ON table_31 ("column_6;"); 41 | 42 | CREATE INDEX idx_546591_primary ON table_27 ("column_2;"); 43 | 44 | CREATE INDEX idx_546573_primary ON table_28 ("column_0;"); 45 | 46 | CREATE INDEX idx_546476_primary ON table_25 ("column_0;"); 47 | -------------------------------------------------------------------------------- /retails/shopmall/tables/CreatePrimaryKeys.sql: -------------------------------------------------------------------------------- 1 | -- 添加主键的命令 2 | 3 | 4 | ALTER TABLE table_12 5 | ADD CONSTRAINT idx_546506_primary 6 | PRIMARY KEY ("column_12;"); 7 | 8 | 9 | ALTER TABLE table_15 10 | ADD CONSTRAINT idx_546486_primary 11 | PRIMARY KEY ("column_5;"); 12 | 13 | 14 | ALTER TABLE table_23 15 | ADD CONSTRAINT idx_546408_primary 16 | PRIMARY KEY ("column_0;"); 17 | 18 | 19 | ALTER TABLE table_21 20 | ADD CONSTRAINT idx_546581_primary 21 | PRIMARY KEY ("column_11;"); 22 | 23 | 24 | ALTER TABLE table_2 25 | ADD CONSTRAINT idx_546561_primary 26 | PRIMARY KEY ("column_6;"); 27 | 28 | 29 | ALTER TABLE table_17 30 | ADD CONSTRAINT idx_546388_primary 31 | PRIMARY KEY ("column_1;"); 32 | 33 | 34 | ALTER TABLE table_32 35 | ADD CONSTRAINT idx_546402_primary 36 | PRIMARY KEY ("column_0;"); 37 | 38 | 39 | ALTER TABLE table_9 40 | ADD CONSTRAINT idx_546394_primary 41 | PRIMARY KEY ("column_2;"); 42 | 43 | 44 | ALTER TABLE table_7 45 | ADD CONSTRAINT idx_546555_primary 46 | PRIMARY KEY ("column_5;"); 47 | 48 | 49 | ALTER TABLE table_34 50 | ADD CONSTRAINT idx_546542_primary 51 | PRIMARY KEY ("column_10;"); 52 | 53 | 54 | ALTER TABLE table_4 55 | ADD CONSTRAINT idx_546588_primary 56 | PRIMARY KEY ("column_0;"); 57 | 58 | 59 | ALTER TABLE table_3 60 | ADD CONSTRAINT idx_546513_primary 61 | PRIMARY KEY ("column_1;"); 62 | 63 | 64 | ALTER TABLE table_22 65 | ADD CONSTRAINT idx_546462_primary 66 | PRIMARY KEY ("column_1;"); 67 | 68 | 69 | ALTER TABLE table_8 70 | ADD CONSTRAINT idx_546549_primary 71 | PRIMARY KEY ("column_28;"); 72 | 73 | 74 | ALTER TABLE table_16 75 | ADD CONSTRAINT idx_546450_primary 76 | PRIMARY KEY ("column_3;"); 77 | 78 | 79 | ALTER TABLE table_10 80 | ADD CONSTRAINT idx_546421_primary 81 | PRIMARY KEY ("column_5;"); 82 | 83 | 84 | ALTER TABLE table_18 85 | ADD CONSTRAINT idx_546431_primary 86 | PRIMARY KEY ("column_0;"); 87 | 88 | 89 | ALTER TABLE table_30 90 | ADD CONSTRAINT idx_546604_primary 91 | PRIMARY KEY ("column_0;"); 92 | 93 | 94 | ALTER TABLE table_31 95 | ADD CONSTRAINT idx_546567_primary 96 | PRIMARY KEY ("column_6;"); 97 | 98 | 99 | ALTER TABLE table_27 100 | ADD CONSTRAINT idx_546591_primary 101 | PRIMARY KEY ("column_2;"); 102 | 103 | 104 | ALTER TABLE table_28 105 | ADD CONSTRAINT idx_546573_primary 106 | PRIMARY KEY ("column_0;"); 107 | 108 | 109 | ALTER TABLE table_25 110 | ADD CONSTRAINT idx_546476_primary 111 | PRIMARY KEY ("column_0;"); 112 | 113 | -------------------------------------------------------------------------------- /retails/store-sales-time-series-forecasting/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/store-sales-time-series-forecasting/holidays_events.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/store-sales-time-series-forecasting/main.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/store-sales-time-series-forecasting/oil.csv 4 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/store-sales-time-series-forecasting/stores.csv 5 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/store-sales-time-series-forecasting/transactions.csv 6 | -------------------------------------------------------------------------------- /retails/store-sales-time-series-forecasting/queries/store-sales-time-series-forecasting_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `store_nbr` as store_nbr_1, 7 | `date` as main_date_original_0, 8 | `store_nbr` as main_store_nbr_original_1, 9 | `sales` as main_sales_original_2, 10 | `family` as main_family_original_9, 11 | `onpromotion` as main_onpromotion_original_10, 12 | `family` as main_family_combine_11, 13 | `store_nbr` as main_store_nbr_combine_11, 14 | `family` as main_family_combine_12, 15 | `family` as main_family_combine_13, 16 | `family` as main_family_combine_14, 17 | dayofweek(timestamp(`date`)) as main_date_dayofweek_15, 18 | case when 1 < dayofweek(timestamp(`date`)) and dayofweek(timestamp(`date`)) < 7 then 1 else 0 end as main_date_isweekday_16, 19 | fz_top1_ratio(`onpromotion`) over main_store_nbr_date_0s_2d_200 as main_onpromotion_window_top1_ratio_17, 20 | distinct_count(`onpromotion`) over main_store_nbr_date_0s_64d_100 as main_onpromotion_window_unique_count_18, 21 | distinct_count(`onpromotion`) over main_store_nbr_date_0s_2d_200 as main_onpromotion_window_unique_count_19, 22 | distinct_count(`store_nbr`) over main_onpromotion_date_0_10_ as main_store_nbr_window_unique_count_20, 23 | fz_top1_ratio(`store_nbr`) over main_onpromotion_date_0_10_ as main_store_nbr_window_top1_ratio_21, 24 | distinct_count(`family`) over main_onpromotion_date_0s_64d_200 as main_family_window_unique_count_22, 25 | distinct_count(`family`) over main_onpromotion_date_0s_14d_200 as main_family_window_unique_count_23, 26 | distinct_count(`store_nbr`) over main_onpromotion_date_0s_64d_100 as main_store_nbr_window_unique_count_24, 27 | fz_top1_ratio(`store_nbr`) over main_onpromotion_date_0s_64d_200 as main_store_nbr_window_top1_ratio_25, 28 | fz_top1_ratio(`family`) over main_onpromotion_date_0s_7d_200 as main_family_window_top1_ratio_26, 29 | fz_top1_ratio(`family`) over main_onpromotion_date_0s_64d_200 as main_family_window_top1_ratio_27, 30 | case when !isnull(at(`onpromotion`, 0)) over main_store_nbr_date_0s_7d_200 then count_where(`onpromotion`, `onpromotion` = at(`onpromotion`, 0)) over main_store_nbr_date_0s_7d_200 else null end as main_onpromotion_window_count_28 31 | from 32 | `main` 33 | window main_store_nbr_date_0s_2d_200 as (partition by `store_nbr` order by `date` rows_range between 2d open preceding and 0s preceding MAXSIZE 200), 34 | main_store_nbr_date_0s_64d_100 as (partition by `store_nbr` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 35 | main_onpromotion_date_0_10_ as (partition by `onpromotion` order by `date` rows between 10 open preceding and 0 preceding), 36 | main_onpromotion_date_0s_64d_200 as (partition by `onpromotion` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 37 | main_onpromotion_date_0s_14d_200 as (partition by `onpromotion` order by `date` rows_range between 14d open preceding and 0s preceding MAXSIZE 200), 38 | main_onpromotion_date_0s_64d_100 as (partition by `onpromotion` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 39 | main_onpromotion_date_0s_7d_200 as (partition by `onpromotion` order by `date` rows_range between 7d open preceding and 0s preceding MAXSIZE 200), 40 | main_store_nbr_date_0s_7d_200 as (partition by `store_nbr` order by `date` rows_range between 7d open preceding and 0s preceding MAXSIZE 200)) 41 | as out0 42 | last join 43 | ( 44 | select 45 | `main`.`store_nbr` as store_nbr_4, 46 | `stores_store_nbr`.`city` as stores_city_multi_direct_3, 47 | `stores_store_nbr`.`cluster` as stores_cluster_multi_direct_4, 48 | `stores_store_nbr`.`state` as stores_state_multi_direct_5, 49 | `stores_store_nbr`.`type` as stores_type_multi_direct_6 50 | from 51 | `main` 52 | last join `stores` as `stores_store_nbr` on `main`.`store_nbr` = `stores_store_nbr`.`store_nbr`) 53 | as out1 54 | on out0.store_nbr_1 = out1.store_nbr_4 55 | last join 56 | ( 57 | select 58 | `store_nbr` as store_nbr_8, 59 | fz_topn_frequency(`transactions`, 3) over transactions_store_nbr_date_0s_64d_100 as transactions_transactions_multi_top3frequency_7, 60 | distinct_count(`transactions`) over transactions_store_nbr_date_0s_64d_100 as transactions_transactions_multi_unique_count_8 61 | from 62 | (select `date` as `date`, `store_nbr` as `store_nbr`, int(0) as `transactions` from `main`) 63 | window transactions_store_nbr_date_0s_64d_100 as ( 64 | UNION `transactions` partition by `store_nbr` order by `date` rows_range between 64d open preceding and 0s preceding MAXSIZE 100 INSTANCE_NOT_IN_WINDOW)) 65 | as out2 66 | on out0.store_nbr_1 = out2.store_nbr_8 67 | ; -------------------------------------------------------------------------------- /retails/store-sales-time-series-forecasting/tables/store-sales-time-series-forecasting_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE holidays_events (date TIMESTAMP, type VARCHAR, locale VARCHAR, locale_name VARCHAR, description VARCHAR, transferred VARCHAR); 2 | CREATE TABLE main (id INT, date TIMESTAMP, store_nbr INT, family VARCHAR, sales DOUBLE PRECISION, onpromotion INT); 3 | CREATE TABLE oil (date TIMESTAMP, dcoilwtico DOUBLE PRECISION); 4 | CREATE TABLE stores (store_nbr INT, city VARCHAR, state VARCHAR, type VARCHAR, cluster INT); 5 | CREATE TABLE transactions (date TIMESTAMP, store_nbr INT, transactions INT); 6 | -------------------------------------------------------------------------------- /retails/transaction/data_link.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaDatabaseGroup/datasets/685fabfcb5eaee9b2f3a889206b0b0a0f81f22e0/retails/transaction/data_link.txt -------------------------------------------------------------------------------- /retails/transaction/tables/CreateIndexes.sql: -------------------------------------------------------------------------------- 1 | -- 创建索引 2 | 3 | 4 | CREATE INDEX idx_547293_primary ON table_25 (column_0); 5 | 6 | CREATE INDEX idx_547084_primary ON table_27 (column_0); 7 | 8 | CREATE INDEX idx_547126_primary ON table_22 (column_3); 9 | 10 | CREATE INDEX idx_547165_primary ON table_23 (column_0); 11 | 12 | CREATE INDEX idx_547339_primary ON table_29 (column_0); 13 | 14 | CREATE INDEX idx_547279_primary ON table_15 (column_1); 15 | 16 | CREATE INDEX idx_547309_primary ON table_35 (column_0); 17 | 18 | CREATE INDEX idx_547299_primary ON table_16 (column_0); 19 | 20 | CREATE INDEX idx_547240_primary ON table_33 (column_0); 21 | 22 | CREATE INDEX idx_547096_primary ON table_41 (column_0); 23 | 24 | CREATE INDEX idx_547209_primary ON table_1 (column_0); 25 | 26 | CREATE INDEX idx_547156_primary ON table_0 (column_0); 27 | 28 | CREATE INDEX idx_547181_primary ON table_42 (column_0); 29 | 30 | CREATE INDEX idx_547187_primary ON table_9 (column_0); 31 | 32 | CREATE INDEX idx_547270_primary ON table_2 (column_0); 33 | 34 | CREATE INDEX idx_547172_primary ON table_43 (column_0); 35 | 36 | CREATE INDEX idx_547197_primary ON table_30 (column_0); 37 | 38 | CREATE INDEX idx_547144_primary ON table_46 (column_2); 39 | 40 | CREATE INDEX idx_547333_primary ON table_10 (column_0); 41 | 42 | CREATE INDEX idx_547190_primary ON table_26 (column_0); 43 | 44 | CREATE INDEX idx_547246_primary ON table_6 (column_0); 45 | 46 | CREATE INDEX idx_547206_primary ON table_31 (column_0); 47 | 48 | CREATE INDEX idx_547175_primary ON table_21 (column_0); 49 | 50 | CREATE INDEX idx_547200_primary ON table_38 (column_0); 51 | 52 | CREATE INDEX idx_547321_primary ON table_34 (column_0); 53 | 54 | CREATE INDEX idx_547234_primary ON table_3 (column_0); 55 | 56 | CREATE INDEX idx_547276_primary ON table_49 (column_1); 57 | 58 | CREATE INDEX idx_547162_primary ON table_37 (column_0); 59 | 60 | CREATE INDEX idx_547108_primary ON table_40 (column_0); 61 | 62 | CREATE INDEX idx_547114_primary ON table_20 (column_1); 63 | 64 | CREATE INDEX idx_547102_primary ON table_13 (column_0); 65 | 66 | CREATE INDEX idx_547327_primary ON table_48 (column_0); 67 | 68 | CREATE INDEX idx_547227_primary ON table_18 (column_0); 69 | 70 | CREATE INDEX idx_547258_primary ON table_5 (column_0); 71 | 72 | CREATE INDEX idx_547252_primary ON table_44 (column_0); 73 | 74 | CREATE INDEX idx_547286_primary ON table_19 (column_0); 75 | 76 | CREATE INDEX idx_547351_primary ON table_28 (column_0); 77 | 78 | CREATE INDEX idx_547264_primary ON table_36 (column_0); 79 | 80 | CREATE INDEX idx_547315_primary ON table_17 (column_0); 81 | 82 | CREATE INDEX idx_547305_primary ON table_45 (column_1); 83 | 84 | CREATE INDEX idx_547215_primary ON table_32 (column_0); 85 | 86 | CREATE INDEX idx_547358_primary ON table_14 (column_0); 87 | 88 | CREATE INDEX idx_547220_primary ON table_8 (column_0); 89 | 90 | CREATE INDEX idx_547078_primary ON table_4 (column_0); 91 | 92 | CREATE INDEX idx_547345_primary ON table_7 (column_0); 93 | 94 | CREATE INDEX idx_547090_primary ON table_24 (column_0); 95 | 96 | CREATE INDEX idx_547140_primary ON table_11 (column_0); 97 | -------------------------------------------------------------------------------- /retails/transaction/tables/CreatePrimaryKeys.sql: -------------------------------------------------------------------------------- 1 | -- 添加主键的命令 2 | 3 | 4 | ALTER TABLE table_25 5 | ADD CONSTRAINT idx_547293_primary 6 | PRIMARY KEY (column_0); 7 | 8 | 9 | ALTER TABLE table_27 10 | ADD CONSTRAINT idx_547084_primary 11 | PRIMARY KEY (column_0); 12 | 13 | 14 | ALTER TABLE table_22 15 | ADD CONSTRAINT idx_547126_primary 16 | PRIMARY KEY (column_3); 17 | 18 | 19 | ALTER TABLE table_23 20 | ADD CONSTRAINT idx_547165_primary 21 | PRIMARY KEY (column_0); 22 | 23 | 24 | ALTER TABLE table_29 25 | ADD CONSTRAINT idx_547339_primary 26 | PRIMARY KEY (column_0); 27 | 28 | 29 | ALTER TABLE table_15 30 | ADD CONSTRAINT idx_547279_primary 31 | PRIMARY KEY (column_1); 32 | 33 | 34 | ALTER TABLE table_35 35 | ADD CONSTRAINT idx_547309_primary 36 | PRIMARY KEY (column_0); 37 | 38 | 39 | ALTER TABLE table_16 40 | ADD CONSTRAINT idx_547299_primary 41 | PRIMARY KEY (column_0); 42 | 43 | 44 | ALTER TABLE table_33 45 | ADD CONSTRAINT idx_547240_primary 46 | PRIMARY KEY (column_0); 47 | 48 | 49 | ALTER TABLE table_41 50 | ADD CONSTRAINT idx_547096_primary 51 | PRIMARY KEY (column_0); 52 | 53 | 54 | ALTER TABLE table_1 55 | ADD CONSTRAINT idx_547209_primary 56 | PRIMARY KEY (column_0); 57 | 58 | 59 | ALTER TABLE table_0 60 | ADD CONSTRAINT idx_547156_primary 61 | PRIMARY KEY (column_0); 62 | 63 | 64 | ALTER TABLE table_42 65 | ADD CONSTRAINT idx_547181_primary 66 | PRIMARY KEY (column_0); 67 | 68 | 69 | ALTER TABLE table_9 70 | ADD CONSTRAINT idx_547187_primary 71 | PRIMARY KEY (column_0); 72 | 73 | 74 | ALTER TABLE table_2 75 | ADD CONSTRAINT idx_547270_primary 76 | PRIMARY KEY (column_0); 77 | 78 | 79 | ALTER TABLE table_43 80 | ADD CONSTRAINT idx_547172_primary 81 | PRIMARY KEY (column_0); 82 | 83 | 84 | ALTER TABLE table_30 85 | ADD CONSTRAINT idx_547197_primary 86 | PRIMARY KEY (column_0); 87 | 88 | 89 | ALTER TABLE table_46 90 | ADD CONSTRAINT idx_547144_primary 91 | PRIMARY KEY (column_2); 92 | 93 | 94 | ALTER TABLE table_10 95 | ADD CONSTRAINT idx_547333_primary 96 | PRIMARY KEY (column_0); 97 | 98 | 99 | ALTER TABLE table_26 100 | ADD CONSTRAINT idx_547190_primary 101 | PRIMARY KEY (column_0); 102 | 103 | 104 | ALTER TABLE table_6 105 | ADD CONSTRAINT idx_547246_primary 106 | PRIMARY KEY (column_0); 107 | 108 | 109 | ALTER TABLE table_31 110 | ADD CONSTRAINT idx_547206_primary 111 | PRIMARY KEY (column_0); 112 | 113 | 114 | ALTER TABLE table_21 115 | ADD CONSTRAINT idx_547175_primary 116 | PRIMARY KEY (column_0); 117 | 118 | 119 | ALTER TABLE table_38 120 | ADD CONSTRAINT idx_547200_primary 121 | PRIMARY KEY (column_0); 122 | 123 | 124 | ALTER TABLE table_34 125 | ADD CONSTRAINT idx_547321_primary 126 | PRIMARY KEY (column_0); 127 | 128 | 129 | ALTER TABLE table_3 130 | ADD CONSTRAINT idx_547234_primary 131 | PRIMARY KEY (column_0); 132 | 133 | 134 | ALTER TABLE table_49 135 | ADD CONSTRAINT idx_547276_primary 136 | PRIMARY KEY (column_1); 137 | 138 | 139 | ALTER TABLE table_37 140 | ADD CONSTRAINT idx_547162_primary 141 | PRIMARY KEY (column_0); 142 | 143 | 144 | ALTER TABLE table_40 145 | ADD CONSTRAINT idx_547108_primary 146 | PRIMARY KEY (column_0); 147 | 148 | 149 | ALTER TABLE table_20 150 | ADD CONSTRAINT idx_547114_primary 151 | PRIMARY KEY (column_1); 152 | 153 | 154 | ALTER TABLE table_13 155 | ADD CONSTRAINT idx_547102_primary 156 | PRIMARY KEY (column_0); 157 | 158 | 159 | ALTER TABLE table_48 160 | ADD CONSTRAINT idx_547327_primary 161 | PRIMARY KEY (column_0); 162 | 163 | 164 | ALTER TABLE table_18 165 | ADD CONSTRAINT idx_547227_primary 166 | PRIMARY KEY (column_0); 167 | 168 | 169 | ALTER TABLE table_5 170 | ADD CONSTRAINT idx_547258_primary 171 | PRIMARY KEY (column_0); 172 | 173 | 174 | ALTER TABLE table_44 175 | ADD CONSTRAINT idx_547252_primary 176 | PRIMARY KEY (column_0); 177 | 178 | 179 | ALTER TABLE table_19 180 | ADD CONSTRAINT idx_547286_primary 181 | PRIMARY KEY (column_0); 182 | 183 | 184 | ALTER TABLE table_28 185 | ADD CONSTRAINT idx_547351_primary 186 | PRIMARY KEY (column_0); 187 | 188 | 189 | ALTER TABLE table_36 190 | ADD CONSTRAINT idx_547264_primary 191 | PRIMARY KEY (column_0); 192 | 193 | 194 | ALTER TABLE table_17 195 | ADD CONSTRAINT idx_547315_primary 196 | PRIMARY KEY (column_0); 197 | 198 | 199 | ALTER TABLE table_45 200 | ADD CONSTRAINT idx_547305_primary 201 | PRIMARY KEY (column_1); 202 | 203 | 204 | ALTER TABLE table_32 205 | ADD CONSTRAINT idx_547215_primary 206 | PRIMARY KEY (column_0); 207 | 208 | 209 | ALTER TABLE table_14 210 | ADD CONSTRAINT idx_547358_primary 211 | PRIMARY KEY (column_0); 212 | 213 | 214 | ALTER TABLE table_8 215 | ADD CONSTRAINT idx_547220_primary 216 | PRIMARY KEY (column_0); 217 | 218 | 219 | ALTER TABLE table_4 220 | ADD CONSTRAINT idx_547078_primary 221 | PRIMARY KEY (column_0); 222 | 223 | 224 | ALTER TABLE table_7 225 | ADD CONSTRAINT idx_547345_primary 226 | PRIMARY KEY (column_0); 227 | 228 | 229 | ALTER TABLE table_24 230 | ADD CONSTRAINT idx_547090_primary 231 | PRIMARY KEY (column_0); 232 | 233 | 234 | ALTER TABLE table_11 235 | ADD CONSTRAINT idx_547140_primary 236 | PRIMARY KEY (column_0); 237 | 238 | -------------------------------------------------------------------------------- /retails/walmart-recruiting-sales-in-stormy-weather/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/walmart-recruiting-sales-in-stormy-weather/key.csv 2 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/walmart-recruiting-sales-in-stormy-weather/train.csv 3 | https://dbgroup.cs.tsinghua.edu.cn/datasets/retails/walmart-recruiting-sales-in-stormy-weather/weather.csv 4 | -------------------------------------------------------------------------------- /retails/walmart-recruiting-sales-in-stormy-weather/queries/walmart-recruiting-sales-in-stormy-weather_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | select * from 4 | ( 5 | select 6 | `date` as date_1, 7 | `date` as train_date_original_0, 8 | `units` as train_units_original_2, 9 | `item_nbr` as train_item_nbr_original_19, 10 | `store_nbr` as train_store_nbr_original_20, 11 | `item_nbr` as train_item_nbr_combine_21, 12 | `store_nbr` as train_store_nbr_combine_21, 13 | `item_nbr` as train_item_nbr_combine_22, 14 | `store_nbr` as train_store_nbr_combine_22, 15 | `item_nbr` as train_item_nbr_combine_23, 16 | `store_nbr` as train_store_nbr_combine_23, 17 | `item_nbr` as train_item_nbr_combine_24, 18 | `store_nbr` as train_store_nbr_combine_24 19 | from 20 | `train` 21 | ) 22 | as out0 23 | last join 24 | ( 25 | select 26 | `train`.`date` as date_3, 27 | `key_store_nbr`.`station_nbr` as key_station_nbr_multi_direct_3, 28 | `weather_date`.`avgspeed` as weather_avgspeed_multi_direct_4, 29 | `weather_date`.`codesum` as weather_codesum_multi_direct_5, 30 | `weather_date`.`cool` as weather_cool_multi_direct_6, 31 | `weather_date`.`depart` as weather_depart_multi_direct_7, 32 | `weather_date`.`dewpoint` as weather_dewpoint_multi_direct_8, 33 | `weather_date`.`heat` as weather_heat_multi_direct_9, 34 | `weather_date`.`preciptotal` as weather_preciptotal_multi_direct_10, 35 | `weather_date`.`resultdir` as weather_resultdir_multi_direct_11, 36 | `weather_date`.`resultspeed` as weather_resultspeed_multi_direct_12, 37 | `weather_date`.`sealevel` as weather_sealevel_multi_direct_13, 38 | `weather_date`.`stnpressure` as weather_stnpressure_multi_direct_14, 39 | `weather_date`.`tavg` as weather_tavg_multi_direct_15, 40 | `weather_date`.`tmax` as weather_tmax_multi_direct_16, 41 | `weather_date`.`tmin` as weather_tmin_multi_direct_17, 42 | `weather_date`.`wetbulb` as weather_wetbulb_multi_direct_18 43 | from 44 | `train` 45 | last join `key` as `key_store_nbr` on `train`.`store_nbr` = `key_store_nbr`.`store_nbr` 46 | last join `weather` as `weather_date` on `train`.`date` = `weather_date`.`date`) 47 | as out1 48 | on out0.date_1 = out1.date_3 49 | ; -------------------------------------------------------------------------------- /retails/walmart-recruiting-sales-in-stormy-weather/tables/walmart-recruiting-sales-in-stormy-weather_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE key 2 | ( 3 | store_nbr INT, 4 | station_nbr INT 5 | ); 6 | CREATE TABLE train 7 | ( 8 | date TIMESTAMP, 9 | store_nbr INT, 10 | item_nbr INT, 11 | units INT 12 | ); 13 | CREATE TABLE weather 14 | ( 15 | station_nbr INT, 16 | date TIMESTAMP, 17 | tmax VARCHAR, 18 | tmin VARCHAR, 19 | tavg VARCHAR, 20 | depart VARCHAR, 21 | dewpoint VARCHAR, 22 | wetbulb VARCHAR, 23 | heat VARCHAR, 24 | cool VARCHAR, 25 | sunrise VARCHAR, 26 | sunset VARCHAR, 27 | codesum VARCHAR, 28 | snowfall VARCHAR, 29 | preciptotal VARCHAR, 30 | stnpressure VARCHAR, 31 | sealevel VARCHAR, 32 | resultspeed VARCHAR, 33 | resultdir VARCHAR, 34 | avgspeed VARCHAR 35 | ); 36 | -------------------------------------------------------------------------------- /transport/nyc-taxi-trip-duration/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/transport/nyc-taxi-trip-duration/train.csv 2 | -------------------------------------------------------------------------------- /transport/nyc-taxi-trip-duration/queries/nyc-taxi-trip-duration_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `id` as id_1, 6 | `pickup_datetime` as train_pickup_datetime_original_0, 7 | `id` as train_id_original_1, 8 | `trip_duration` as train_trip_duration_original_2, 9 | `dropoff_datetime` as train_dropoff_datetime_original_3, 10 | `dropoff_latitude` as train_dropoff_latitude_original_4, 11 | `dropoff_longitude` as train_dropoff_longitude_original_5, 12 | `passenger_count` as train_passenger_count_original_6, 13 | `pickup_latitude` as train_pickup_latitude_original_7, 14 | `pickup_longitude` as train_pickup_longitude_original_8, 15 | `store_and_fwd_flag` as train_store_and_fwd_flag_original_9, 16 | `vendor_id` as train_vendor_id_original_10, 17 | `dropoff_longitude` as train_dropoff_longitude_divide_11, 18 | `pickup_longitude` as train_pickup_longitude_divide_11, 19 | `pickup_longitude` as train_pickup_longitude_divide_12, 20 | `dropoff_longitude` as train_dropoff_longitude_divide_12, 21 | `pickup_latitude` as train_pickup_latitude_divide_13, 22 | `dropoff_latitude` as train_dropoff_latitude_divide_13, 23 | `dropoff_latitude` as train_dropoff_latitude_divide_14, 24 | `pickup_latitude` as train_pickup_latitude_divide_14, 25 | `pickup_longitude` as train_pickup_longitude_multiply_15, 26 | `dropoff_latitude` as train_dropoff_latitude_multiply_15, 27 | `dropoff_longitude` as train_dropoff_longitude_multiply_15, 28 | `pickup_longitude` as train_pickup_longitude_multiply_16, 29 | `dropoff_latitude` as train_dropoff_latitude_multiply_16, 30 | `pickup_longitude` as train_pickup_longitude_multiply_17, 31 | `pickup_latitude` as train_pickup_latitude_multiply_17, 32 | `dropoff_longitude` as train_dropoff_longitude_multiply_17, 33 | `pickup_longitude` as train_pickup_longitude_multiply_18, 34 | `dropoff_longitude` as train_dropoff_longitude_multiply_18, 35 | `pickup_latitude` as train_pickup_latitude_multiply_19, 36 | `dropoff_longitude` as train_dropoff_longitude_multiply_19, 37 | hour(timestamp(`dropoff_datetime`)) as train_dropoff_datetime_hourofday_20, 38 | hour(timestamp(`pickup_datetime`)) as train_pickup_datetime_hourofday_21, 39 | case when !isnull(at(`vendor_id`, 0)) over train_store_and_fwd_flag_pickup_datetime_0s_1h_-1 then count_where(`vendor_id`, `vendor_id` = at(`vendor_id`, 0)) over train_store_and_fwd_flag_pickup_datetime_0s_1h_-1 else null end as train_vendor_id_window_count_22, 40 | sum(`pickup_latitude`) over train_vendor_id_pickup_datetime_0s_1h_-1 as train_pickup_latitude_window_sum_23, 41 | sum(`pickup_latitude`) over train_store_and_fwd_flag_pickup_datetime_0s_1h_-2 as train_pickup_latitude_window_sum_24, 42 | case when !isnull(at(`vendor_id`, 0)) over train_store_and_fwd_flag_pickup_datetime_0s_2h_-1 then count_where(`vendor_id`, `vendor_id` = at(`vendor_id`, 0)) over train_store_and_fwd_flag_pickup_datetime_0s_2h_-1 else null end as train_vendor_id_window_count_25, 43 | avg(`pickup_latitude`) over train_store_and_fwd_flag_pickup_datetime_0s_5h_-1 as train_pickup_latitude_window_avg_26, 44 | sum(`pickup_latitude`) over train_vendor_id_pickup_datetime_0s_2h_-2 as train_pickup_latitude_window_sum_27, 45 | sum(`dropoff_latitude`) over train_store_and_fwd_flag_pickup_datetime_0s_1h_-2 as train_dropoff_latitude_window_sum_28 46 | from 47 | `train` 48 | window train_store_and_fwd_flag_pickup_datetime_0s_1h_-1 as (partition by `store_and_fwd_flag` order by `pickup_datetime` rows_range between 1h open preceding and 0s preceding), 49 | train_vendor_id_pickup_datetime_0s_1h_-1 as (partition by `vendor_id` order by `pickup_datetime` rows_range between 1h open preceding and 0s preceding), 50 | train_store_and_fwd_flag_pickup_datetime_0s_1h_-2 as (partition by `store_and_fwd_flag` order by `pickup_datetime` rows_range between 1h open preceding and 0s preceding), 51 | train_store_and_fwd_flag_pickup_datetime_0s_2h_-1 as (partition by `store_and_fwd_flag` order by `pickup_datetime` rows_range between 2h open preceding and 0s preceding), 52 | train_store_and_fwd_flag_pickup_datetime_0s_5h_-1 as (partition by `store_and_fwd_flag` order by `pickup_datetime` rows_range between 5h open preceding and 0s preceding), 53 | train_vendor_id_pickup_datetime_0s_2h_-2 as (partition by `vendor_id` order by `pickup_datetime` rows_range between 2h open preceding and 0s preceding); -------------------------------------------------------------------------------- /transport/nyc-taxi-trip-duration/tables/nyc-taxi-trip-duration_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE train (id VARCHAR, vendor_id INT, pickup_datetime TIMESTAMP, dropoff_datetime TIMESTAMP, passenger_count INT, pickup_longitude DOUBLE PRECISION, pickup_latitude DOUBLE PRECISION, dropoff_longitude DOUBLE PRECISION, dropoff_latitude DOUBLE PRECISION, store_and_fwd_flag VARCHAR, trip_duration INT); 2 | -------------------------------------------------------------------------------- /transport/taxi-trajectory/data_link.txt: -------------------------------------------------------------------------------- 1 | https://dbgroup.cs.tsinghua.edu.cn/datasets/transport/taxi-trajectory/train.csv 2 | -------------------------------------------------------------------------------- /transport/taxi-trajectory/queries/taxi-trajectory_query.sql: -------------------------------------------------------------------------------- 1 | # start sql code 2 | # output table name: sql_table 3 | 4 | select 5 | `TRIP_ID` as TRIP_ID_1, 6 | `TIMESTAMP` as train_TIMESTAMP_original_0, 7 | `TRIP_ID` as train_TRIP_ID_original_1, 8 | `ORIGIN_STAND` as train_ORIGIN_STAND_original_2, 9 | `CALL_TYPE` as train_CALL_TYPE_original_3, 10 | `ORIGIN_CALL` as train_ORIGIN_CALL_original_4, 11 | `POLYLINE` as train_POLYLINE_original_5, 12 | `TAXI_ID` as train_TAXI_ID_original_6, 13 | `CALL_TYPE` as train_CALL_TYPE_combine_7, 14 | `ORIGIN_CALL` as train_ORIGIN_CALL_combine_7, 15 | `TAXI_ID` as train_TAXI_ID_combine_7, 16 | `CALL_TYPE` as train_CALL_TYPE_combine_8, 17 | `TAXI_ID` as train_TAXI_ID_combine_8, 18 | `ORIGIN_CALL` as train_ORIGIN_CALL_combine_9, 19 | `TAXI_ID` as train_TAXI_ID_combine_9, 20 | `CALL_TYPE` as train_CALL_TYPE_combine_10, 21 | `ORIGIN_CALL` as train_ORIGIN_CALL_combine_10, 22 | distinct_count(`POLYLINE`) over train_TAXI_ID_TIMESTAMP_0s_1h_200 as train_POLYLINE_window_unique_count_11, 23 | fz_top1_ratio(`TRIP_ID`) over train_TAXI_ID_TIMESTAMP_0s_1h_200 as train_TRIP_ID_window_top1_ratio_12, 24 | case when !isnull(at(`CALL_TYPE`, 0)) over train_TAXI_ID_TIMESTAMP_0s_14d_100 then count_where(`CALL_TYPE`, `CALL_TYPE` = at(`CALL_TYPE`, 0)) over train_TAXI_ID_TIMESTAMP_0s_14d_100 else null end as train_CALL_TYPE_window_count_13, 25 | case when !isnull(at(`CALL_TYPE`, 0)) over train_TAXI_ID_TIMESTAMP_0s_64d_100 then count_where(`CALL_TYPE`, `CALL_TYPE` = at(`CALL_TYPE`, 0)) over train_TAXI_ID_TIMESTAMP_0s_64d_100 else null end as train_CALL_TYPE_window_count_14, 26 | distinct_count(`TAXI_ID`) over train_CALL_TYPE_TIMESTAMP_0s_64d_200 as train_TAXI_ID_window_unique_count_15, 27 | `CALL_TYPE` as train_CALL_TYPE_combine_16, 28 | `ORIGIN_CALL` as train_ORIGIN_CALL_combine_16, 29 | `POLYLINE` as train_POLYLINE_combine_16, 30 | case when !isnull(at(`CALL_TYPE`, 0)) over train_ORIGIN_CALL_TIMESTAMP_0s_7d_200 then count_where(`CALL_TYPE`, `CALL_TYPE` = at(`CALL_TYPE`, 0)) over train_ORIGIN_CALL_TIMESTAMP_0s_7d_200 else null end as train_CALL_TYPE_window_count_17, 31 | case when !isnull(at(`CALL_TYPE`, 0)) over train_ORIGIN_CALL_TIMESTAMP_0s_10h_200 then count_where(`CALL_TYPE`, `CALL_TYPE` = at(`CALL_TYPE`, 0)) over train_ORIGIN_CALL_TIMESTAMP_0s_10h_200 else null end as train_CALL_TYPE_window_count_18, 32 | fz_top1_ratio(`CALL_TYPE`) over train_ORIGIN_CALL_TIMESTAMP_0s_2d_200 as train_CALL_TYPE_window_top1_ratio_19, 33 | fz_top1_ratio(`CALL_TYPE`) over train_ORIGIN_CALL_TIMESTAMP_0s_5h_100 as train_CALL_TYPE_window_top1_ratio_20, 34 | case when !isnull(at(`TAXI_ID`, 0)) over train_ORIGIN_CALL_TIMESTAMP_0s_64d_200 then count_where(`TAXI_ID`, `TAXI_ID` = at(`TAXI_ID`, 0)) over train_ORIGIN_CALL_TIMESTAMP_0s_64d_200 else null end as train_TAXI_ID_window_count_21, 35 | case when !isnull(at(`TAXI_ID`, 0)) over train_ORIGIN_CALL_TIMESTAMP_0s_10h_200 then count_where(`TAXI_ID`, `TAXI_ID` = at(`TAXI_ID`, 0)) over train_ORIGIN_CALL_TIMESTAMP_0s_10h_200 else null end as train_TAXI_ID_window_count_22, 36 | case when !isnull(at(`ORIGIN_CALL`, 0)) over train_TAXI_ID_TIMESTAMP_0s_5d_100 then count_where(`ORIGIN_CALL`, `ORIGIN_CALL` = at(`ORIGIN_CALL`, 0)) over train_TAXI_ID_TIMESTAMP_0s_5d_100 else null end as train_ORIGIN_CALL_window_count_23, 37 | fz_top1_ratio(`TAXI_ID`) over train_CALL_TYPE_TIMESTAMP_0s_64d_200 as train_TAXI_ID_window_top1_ratio_24 38 | from 39 | `train` 40 | window train_TAXI_ID_TIMESTAMP_0s_1h_200 as (partition by `TAXI_ID` order by `TIMESTAMP` rows_range between 1h open preceding and 0s preceding MAXSIZE 200), 41 | train_TAXI_ID_TIMESTAMP_0s_14d_100 as (partition by `TAXI_ID` order by `TIMESTAMP` rows_range between 14d open preceding and 0s preceding MAXSIZE 100), 42 | train_TAXI_ID_TIMESTAMP_0s_64d_100 as (partition by `TAXI_ID` order by `TIMESTAMP` rows_range between 64d open preceding and 0s preceding MAXSIZE 100), 43 | train_CALL_TYPE_TIMESTAMP_0s_64d_200 as (partition by `CALL_TYPE` order by `TIMESTAMP` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 44 | train_ORIGIN_CALL_TIMESTAMP_0s_7d_200 as (partition by `ORIGIN_CALL` order by `TIMESTAMP` rows_range between 7d open preceding and 0s preceding MAXSIZE 200), 45 | train_ORIGIN_CALL_TIMESTAMP_0s_10h_200 as (partition by `ORIGIN_CALL` order by `TIMESTAMP` rows_range between 10h open preceding and 0s preceding MAXSIZE 200), 46 | train_ORIGIN_CALL_TIMESTAMP_0s_2d_200 as (partition by `ORIGIN_CALL` order by `TIMESTAMP` rows_range between 2d open preceding and 0s preceding MAXSIZE 200), 47 | train_ORIGIN_CALL_TIMESTAMP_0s_5h_100 as (partition by `ORIGIN_CALL` order by `TIMESTAMP` rows_range between 5h open preceding and 0s preceding MAXSIZE 100), 48 | train_ORIGIN_CALL_TIMESTAMP_0s_64d_200 as (partition by `ORIGIN_CALL` order by `TIMESTAMP` rows_range between 64d open preceding and 0s preceding MAXSIZE 200), 49 | train_TAXI_ID_TIMESTAMP_0s_5d_100 as (partition by `TAXI_ID` order by `TIMESTAMP` rows_range between 5d open preceding and 0s preceding MAXSIZE 100); -------------------------------------------------------------------------------- /transport/taxi-trajectory/tables/taxi-trajectory_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE train 2 | ( 3 | TRIP_ID BIGINT, 4 | CALL_TYPE VARCHAR, 5 | ORIGIN_CALL INT, 6 | ORIGIN_STAND INT, 7 | TAXI_ID INT, 8 | TIMESTAMP TIMESTAMP, 9 | DAY_TYPE VARCHAR, 10 | MISSING_DATA VARCHAR, 11 | POLYLINE VARCHAR 12 | ); 13 | --------------------------------------------------------------------------------