├── .gitignore ├── README.md ├── ddl-tpcds ├── bin_partitioned │ ├── analyze.sql │ ├── analyze_call_center.sql.sql │ ├── analyze_catalog_page.sql.sql │ ├── analyze_catalog_returns.sql.sql │ ├── analyze_catalog_sales.sql.sql │ ├── analyze_customer.sql.sql │ ├── analyze_customer_address.sql.sql │ ├── analyze_customer_demographics.sql.sql │ ├── analyze_date_dim.sql.sql │ ├── analyze_everything.sql │ ├── analyze_household_demographics.sql.sql │ ├── analyze_income_band.sql.sql │ ├── analyze_inventory.sql.sql │ ├── analyze_item.sql.sql │ ├── analyze_promotion.sql.sql │ ├── analyze_reason.sql.sql │ ├── analyze_ship_mode.sql.sql │ ├── analyze_store.sql.sql │ ├── analyze_store_returns.sql.sql │ ├── analyze_store_sales.sql.sql │ ├── analyze_time_dim.sql.sql │ ├── analyze_warehouse.sql.sql │ ├── analyze_web_page.sql.sql │ ├── analyze_web_returns.sql.sql │ ├── analyze_web_sales.sql.sql │ ├── analyze_web_site.sql.sql │ ├── call_center.sql │ ├── catalog_page.sql │ ├── catalog_returns.sql │ ├── catalog_sales.sql │ ├── customer.sql │ ├── customer_address.sql │ ├── customer_demographics.sql │ ├── date_dim.sql │ ├── generate_analyze.pl │ ├── household_demographics.sql │ ├── income_band.sql │ ├── inventory.sql │ ├── item.sql │ ├── promotion.sql │ ├── reason.sql │ ├── ship_mode.sql │ ├── store.sql │ ├── store_returns.sql │ ├── store_sales.sql │ ├── time_dim.sql │ ├── warehouse.sql │ ├── web_page.sql │ ├── web_returns.sql │ ├── web_sales.sql │ └── web_site.sql └── text │ ├── alltables.sql │ ├── analyze_catalog_sales.sql │ ├── analyze_everything.sql │ ├── analyze_store_returns.sql │ ├── analyze_store_sales.sql │ ├── call_center.sql │ ├── catalog_page.sql │ ├── catalog_returns.sql │ ├── catalog_sales.sql │ ├── customer.sql │ ├── customer_address.sql │ ├── customer_demographics.sql │ ├── date_dim.sql │ ├── generate_analyze.pl │ ├── household_demographics.sql │ ├── income_band.sql │ ├── inventory.sql │ ├── item.sql │ ├── promotion.sql │ ├── reason.sql │ ├── ship_mode.sql │ ├── store.sql │ ├── store_returns.sql │ ├── store_sales.sql │ ├── temp.pl │ ├── time_dim.sql │ ├── uberscript.txt │ ├── warehouse.sql │ ├── web_page.sql │ ├── web_returns.sql │ ├── web_sales.sql │ └── web_site.sql ├── ddl-tpch └── bin_flat │ ├── alltables.sql │ ├── customer.sql │ ├── lineitem.sql │ ├── nation.sql │ ├── orders.sql │ ├── part.sql │ ├── partsupp.sql │ ├── region.sql │ └── supplier.sql ├── runSuite.pl ├── sample-queries-tpcds ├── README.md ├── query12.sql ├── query13.sql ├── query15.sql ├── query17.sql ├── query18.sql ├── query19.sql ├── query20.sql ├── query21.sql ├── query22.sql ├── query24.sql ├── query25.sql ├── query26.sql ├── query27.sql ├── query28.sql ├── query29.sql ├── query3.sql ├── query31.sql ├── query32.sql ├── query34.sql ├── query39.sql ├── query40.sql ├── query42.sql ├── query43.sql ├── query45.sql ├── query46.sql ├── query48.sql ├── query49.sql ├── query50.sql ├── query51.sql ├── query52.sql ├── query54.sql ├── query55.sql ├── query56.sql ├── query58.sql ├── query60.sql ├── query64.sql ├── query66.sql ├── query67.sql ├── query68.sql ├── query7.sql ├── query70.sql ├── query71.sql ├── query72.sql ├── query73.sql ├── query75.sql ├── query76.sql ├── query79.sql ├── query80.sql ├── query82.sql ├── query83.sql ├── query84.sql ├── query85.sql ├── query87.sql ├── query88.sql ├── query89.sql ├── query90.sql ├── query91.sql ├── query92.sql ├── query93.sql ├── query94.sql ├── query95.sql ├── query96.sql ├── query97.sql ├── query98.sql ├── testbench-withATS.settings └── testbench.settings ├── sample-queries-tpch ├── README.md ├── testbench-withATS.settings ├── testbench.settings ├── tpch_query1.sql ├── tpch_query10.sql ├── tpch_query11.sql ├── tpch_query12.sql ├── tpch_query13.sql ├── tpch_query14.sql ├── tpch_query15.sql ├── tpch_query16.sql ├── tpch_query17.sql ├── tpch_query18.sql ├── tpch_query19.sql ├── tpch_query2.sql ├── tpch_query20.sql ├── tpch_query21.sql ├── tpch_query22.sql ├── tpch_query3.sql ├── tpch_query4.sql ├── tpch_query5.sql ├── tpch_query6.sql ├── tpch_query7.sql ├── tpch_query8.sql └── tpch_query9.sql ├── settings ├── init.sql ├── load-flat.sql └── load-partitioned.sql ├── tpcds-build.sh ├── tpcds-gen ├── Makefile ├── README.md ├── patches │ ├── Darwin │ │ └── macosx.patch │ └── all │ │ ├── tpcds-buffered.patch │ │ ├── tpcds-strcpy.patch │ │ └── tpcds_misspelled_header_guard.patch ├── pom.xml └── src │ └── main │ └── java │ └── org │ └── notmysock │ └── tpcds │ └── GenTable.java ├── tpcds-setup.sh ├── tpch-build.sh ├── tpch-gen ├── Makefile ├── README.md ├── ddl │ ├── orc.sql │ └── text.sql ├── patches │ └── Darwin │ │ └── macosx.patch ├── pom.xml └── src │ └── main │ └── java │ └── org │ └── notmysock │ └── tpch │ └── GenTable.java └── tpch-setup.sh /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | tpcds_kit.zip 3 | tpch_kit.zip 4 | *.sql.log 5 | derby.log 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RETIRED 2 | 3 | This repository is moved to the [Hortonworks GitHub](https://github.com/hortonworks/hive-testbench). 4 | 5 | Make pull requests against that repository. 6 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_call_center.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE call_center COMPUTE STATISTICS; 7 | ANALYZE TABLE call_center COMPUTE STATISTICS FOR COLUMNS 8 | cc_call_center_sk, cc_call_center_id, cc_rec_start_date, 9 | cc_rec_end_date, cc_closed_date_sk, cc_open_date_sk, cc_name, 10 | cc_class, cc_employees, cc_sq_ft, cc_hours, cc_manager, 11 | cc_mkt_id, cc_mkt_class, cc_mkt_desc, cc_market_manager, 12 | cc_division, cc_division_name, cc_company, cc_company_name, 13 | cc_street_number, cc_street_name, cc_street_type, 14 | cc_suite_number, cc_city, cc_county, cc_state, cc_zip, 15 | cc_country, cc_gmt_offset, cc_tax_percentage; 16 | 17 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_catalog_page.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE catalog_page COMPUTE STATISTICS; 7 | ANALYZE TABLE catalog_page COMPUTE STATISTICS FOR COLUMNS 8 | cp_catalog_page_sk, cp_catalog_page_id, cp_start_date_sk, 9 | cp_end_date_sk, cp_department, cp_catalog_number, 10 | cp_catalog_page_number, cp_description, cp_type; 11 | 12 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_customer.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE customer COMPUTE STATISTICS; 7 | ANALYZE TABLE customer COMPUTE STATISTICS FOR COLUMNS 8 | c_customer_sk, c_customer_id, c_current_cdemo_sk, 9 | c_current_hdemo_sk, c_current_addr_sk, c_first_shipto_date_sk, 10 | c_first_sales_date_sk, c_salutation, c_first_name, c_last_name, 11 | c_preferred_cust_flag, c_birth_day, c_birth_month, 12 | c_birth_year, c_birth_country, c_login, c_email_address, 13 | c_last_review_date; 14 | 15 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_customer_address.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE customer_address COMPUTE STATISTICS; 7 | ANALYZE TABLE customer_address COMPUTE STATISTICS FOR COLUMNS 8 | ca_address_sk, ca_address_id, ca_street_number, ca_street_name, 9 | ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, 10 | ca_zip, ca_country, ca_gmt_offset, ca_location_type; 11 | 12 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_customer_demographics.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE customer_demographics COMPUTE STATISTICS; 7 | ANALYZE TABLE customer_demographics COMPUTE STATISTICS FOR COLUMNS 8 | cd_demo_sk, cd_gender, cd_marital_status, cd_education_status, 9 | cd_purchase_estimate, cd_credit_rating, cd_dep_count, 10 | cd_dep_employed_count, cd_dep_college_count; 11 | 12 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_date_dim.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE date_dim COMPUTE STATISTICS; 7 | ANALYZE TABLE date_dim COMPUTE STATISTICS FOR COLUMNS 8 | d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq, 9 | d_quarter_seq, d_year, d_dow, d_moy, d_dom, d_qoy, d_fy_year, 10 | d_fy_quarter_seq, d_fy_week_seq, d_day_name, d_quarter_name, 11 | d_holiday, d_weekend, d_following_holiday, d_first_dom, 12 | d_last_dom, d_same_day_ly, d_same_day_lq, d_current_day, 13 | d_current_week, d_current_month, d_current_quarter, 14 | d_current_year; 15 | 16 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_household_demographics.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE household_demographics COMPUTE STATISTICS; 7 | ANALYZE TABLE household_demographics COMPUTE STATISTICS FOR COLUMNS 8 | hd_demo_sk, hd_income_band_sk, hd_buy_potential, hd_dep_count, 9 | hd_vehicle_count; 10 | 11 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_income_band.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE income_band COMPUTE STATISTICS; 7 | ANALYZE TABLE income_band COMPUTE STATISTICS FOR COLUMNS 8 | ib_income_band_sk, ib_lower_bound, ib_upper_bound; 9 | 10 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_item.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE item COMPUTE STATISTICS; 7 | ANALYZE TABLE item COMPUTE STATISTICS FOR COLUMNS 8 | i_item_sk, i_item_id, i_rec_start_date, i_rec_end_date, 9 | i_item_desc, i_current_price, i_wholesale_cost, i_brand_id, 10 | i_brand, i_class_id, i_class, i_category_id, i_category, 11 | i_manufact_id, i_manufact, i_size, i_formulation, i_color, 12 | i_units, i_container, i_manager_id, i_product_name; 13 | 14 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_promotion.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE promotion COMPUTE STATISTICS; 7 | ANALYZE TABLE promotion COMPUTE STATISTICS FOR COLUMNS 8 | p_promo_sk, p_promo_id, p_start_date_sk, p_end_date_sk, 9 | p_item_sk, p_cost, p_response_target, p_promo_name, 10 | p_channel_dmail, p_channel_email, p_channel_catalog, 11 | p_channel_tv, p_channel_radio, p_channel_press, 12 | p_channel_event, p_channel_demo, p_channel_details, p_purpose, 13 | p_discount_active; 14 | 15 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_reason.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE reason COMPUTE STATISTICS; 7 | ANALYZE TABLE reason COMPUTE STATISTICS FOR COLUMNS 8 | r_reason_sk, r_reason_id, r_reason_desc; 9 | 10 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_ship_mode.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE ship_mode COMPUTE STATISTICS; 7 | ANALYZE TABLE ship_mode COMPUTE STATISTICS FOR COLUMNS 8 | sm_ship_mode_sk, sm_ship_mode_id, sm_type, sm_code, sm_carrier, 9 | sm_contract; 10 | 11 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_store.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE store COMPUTE STATISTICS; 7 | ANALYZE TABLE store COMPUTE STATISTICS FOR COLUMNS 8 | s_store_sk, s_store_id, s_rec_start_date, s_rec_end_date, 9 | s_closed_date_sk, s_store_name, s_number_employees, 10 | s_floor_space, s_hours, s_manager, s_market_id, 11 | s_geography_class, s_market_desc, s_market_manager, 12 | s_division_id, s_division_name, s_company_id, s_company_name, 13 | s_street_number, s_street_name, s_street_type, s_suite_number, 14 | s_city, s_county, s_state, s_zip, s_country, s_gmt_offset, 15 | s_tax_precentage; 16 | 17 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_time_dim.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE time_dim COMPUTE STATISTICS; 7 | ANALYZE TABLE time_dim COMPUTE STATISTICS FOR COLUMNS 8 | t_time_sk, t_time_id, t_time, t_hour, t_minute, t_second, 9 | t_am_pm, t_shift, t_sub_shift, t_meal_time; 10 | 11 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_warehouse.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE warehouse COMPUTE STATISTICS; 7 | ANALYZE TABLE warehouse COMPUTE STATISTICS FOR COLUMNS 8 | w_warehouse_sk, w_warehouse_id, w_warehouse_name, 9 | w_warehouse_sq_ft, w_street_number, w_street_name, 10 | w_street_type, w_suite_number, w_city, w_county, w_state, 11 | w_zip, w_country, w_gmt_offset; 12 | 13 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_web_page.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE web_page COMPUTE STATISTICS; 7 | ANALYZE TABLE web_page COMPUTE STATISTICS FOR COLUMNS 8 | wp_web_page_sk, wp_web_page_id, wp_rec_start_date, 9 | wp_rec_end_date, wp_creation_date_sk, wp_access_date_sk, 10 | wp_autogen_flag, wp_customer_sk, wp_url, wp_type, 11 | wp_char_count, wp_link_count, wp_image_count, wp_max_ad_count; 12 | 13 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/analyze_web_site.sql.sql: -------------------------------------------------------------------------------- 1 | -- Use filesystem to track stats. 2 | set hive.stats.dbclass=fs; 3 | -- Many tables have some missing partitions, deal with this by ignoring errors. 4 | set hive.cli.errors.ignore=true; 5 | 6 | ANALYZE TABLE web_site COMPUTE STATISTICS; 7 | ANALYZE TABLE web_site COMPUTE STATISTICS FOR COLUMNS 8 | web_site_sk, web_site_id, web_rec_start_date, web_rec_end_date, 9 | web_name, web_open_date_sk, web_close_date_sk, web_class, 10 | web_manager, web_mkt_id, web_mkt_class, web_mkt_desc, 11 | web_market_manager, web_company_id, web_company_name, 12 | web_street_number, web_street_name, web_street_type, 13 | web_suite_number, web_city, web_county, web_state, web_zip, 14 | web_country, web_gmt_offset, web_tax_percentage; 15 | 16 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/call_center.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists call_center; 5 | 6 | create table call_center 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.call_center; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/catalog_page.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_page; 5 | 6 | create table catalog_page 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.catalog_page; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/catalog_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_returns; 5 | 6 | create table catalog_returns 7 | ( 8 | cr_returned_date_sk int, 9 | cr_returned_time_sk int, 10 | cr_item_sk int, 11 | cr_refunded_customer_sk int, 12 | cr_refunded_cdemo_sk int, 13 | cr_refunded_hdemo_sk int, 14 | cr_refunded_addr_sk int, 15 | cr_returning_customer_sk int, 16 | cr_returning_cdemo_sk int, 17 | cr_returning_hdemo_sk int, 18 | cr_returning_addr_sk int, 19 | cr_call_center_sk int, 20 | cr_catalog_page_sk int, 21 | cr_ship_mode_sk int, 22 | cr_warehouse_sk int, 23 | cr_reason_sk int, 24 | cr_order_number int, 25 | cr_return_quantity int, 26 | cr_return_amount float, 27 | cr_return_tax float, 28 | cr_return_amt_inc_tax float, 29 | cr_fee float, 30 | cr_return_ship_cost float, 31 | cr_refunded_cash float, 32 | cr_reversed_charge float, 33 | cr_store_credit float, 34 | cr_net_loss float 35 | ) 36 | partitioned by (cr_returned_date string) 37 | stored as ${FILE}; 38 | 39 | insert overwrite table catalog_returns partition (cr_returned_date) 40 | select 41 | cr.cr_returned_date_sk, 42 | cr.cr_returned_time_sk, 43 | cr.cr_item_sk, 44 | cr.cr_refunded_customer_sk, 45 | cr.cr_refunded_cdemo_sk, 46 | cr.cr_refunded_hdemo_sk, 47 | cr.cr_refunded_addr_sk, 48 | cr.cr_returning_customer_sk, 49 | cr.cr_returning_cdemo_sk, 50 | cr.cr_returning_hdemo_sk, 51 | cr.cr_returning_addr_sk, 52 | cr.cr_call_center_sk, 53 | cr.cr_catalog_page_sk, 54 | cr.cr_ship_mode_sk, 55 | cr.cr_warehouse_sk, 56 | cr.cr_reason_sk, 57 | cr.cr_order_number, 58 | cr.cr_return_quantity, 59 | cr.cr_return_amount, 60 | cr.cr_return_tax, 61 | cr.cr_return_amt_inc_tax, 62 | cr.cr_fee, 63 | cr.cr_return_ship_cost, 64 | cr.cr_refunded_cash, 65 | cr.cr_reversed_charge, 66 | cr.cr_store_credit, 67 | cr.cr_net_loss, 68 | dd.d_date as cr_returned_date 69 | from ${SOURCE}.catalog_returns cr 70 | join ${SOURCE}.date_dim dd 71 | on (cr.cr_returned_date_sk = dd.d_date_sk); 72 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/catalog_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_sales; 5 | 6 | create table catalog_sales 7 | ( 8 | cs_sold_date_sk int, 9 | cs_sold_time_sk int, 10 | cs_ship_date_sk int, 11 | cs_bill_customer_sk int, 12 | cs_bill_cdemo_sk int, 13 | cs_bill_hdemo_sk int, 14 | cs_bill_addr_sk int, 15 | cs_ship_customer_sk int, 16 | cs_ship_cdemo_sk int, 17 | cs_ship_hdemo_sk int, 18 | cs_ship_addr_sk int, 19 | cs_call_center_sk int, 20 | cs_catalog_page_sk int, 21 | cs_ship_mode_sk int, 22 | cs_warehouse_sk int, 23 | cs_item_sk int, 24 | cs_promo_sk int, 25 | cs_order_number int, 26 | cs_quantity int, 27 | cs_wholesale_cost float, 28 | cs_list_price float, 29 | cs_sales_price float, 30 | cs_ext_discount_amt float, 31 | cs_ext_sales_price float, 32 | cs_ext_wholesale_cost float, 33 | cs_ext_list_price float, 34 | cs_ext_tax float, 35 | cs_coupon_amt float, 36 | cs_ext_ship_cost float, 37 | cs_net_paid float, 38 | cs_net_paid_inc_tax float, 39 | cs_net_paid_inc_ship float, 40 | cs_net_paid_inc_ship_tax float, 41 | cs_net_profit float 42 | ) 43 | partitioned by (cs_sold_date string) 44 | stored as ${FILE}; 45 | 46 | insert overwrite table catalog_sales partition (cs_sold_date) 47 | select 48 | cs.cs_sold_date_sk, 49 | cs.cs_sold_time_sk, 50 | cs.cs_ship_date_sk, 51 | cs.cs_bill_customer_sk, 52 | cs.cs_bill_cdemo_sk, 53 | cs.cs_bill_hdemo_sk, 54 | cs.cs_bill_addr_sk, 55 | cs.cs_ship_customer_sk, 56 | cs.cs_ship_cdemo_sk, 57 | cs.cs_ship_hdemo_sk, 58 | cs.cs_ship_addr_sk, 59 | cs.cs_call_center_sk, 60 | cs.cs_catalog_page_sk, 61 | cs.cs_ship_mode_sk, 62 | cs.cs_warehouse_sk, 63 | cs.cs_item_sk, 64 | cs.cs_promo_sk, 65 | cs.cs_order_number, 66 | cs.cs_quantity, 67 | cs.cs_wholesale_cost, 68 | cs.cs_list_price, 69 | cs.cs_sales_price, 70 | cs.cs_ext_discount_amt, 71 | cs.cs_ext_sales_price, 72 | cs.cs_ext_wholesale_cost, 73 | cs.cs_ext_list_price, 74 | cs.cs_ext_tax, 75 | cs.cs_coupon_amt, 76 | cs.cs_ext_ship_cost, 77 | cs.cs_net_paid, 78 | cs.cs_net_paid_inc_tax, 79 | cs.cs_net_paid_inc_ship, 80 | cs.cs_net_paid_inc_ship_tax, 81 | cs.cs_net_profit, 82 | dd.d_date as cs_sold_date 83 | from ${SOURCE}.catalog_sales cs 84 | join ${SOURCE}.date_dim dd 85 | on (cs.cs_sold_date_sk = dd.d_date_sk); 86 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/customer.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer; 5 | 6 | create table customer 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.customer; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/customer_address.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer_address; 5 | 6 | create table customer_address 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.customer_address; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/customer_demographics.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer_demographics; 5 | 6 | create table customer_demographics 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.customer_demographics; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/date_dim.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists date_dim; 5 | 6 | create table date_dim 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.date_dim; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/generate_analyze.pl: -------------------------------------------------------------------------------- 1 | use Text::Wrap; 2 | 3 | use DateTime; 4 | use DateTime::Format::Strptime; 5 | 6 | $Text::Wrap::columns = 72; 7 | 8 | print "-- Use filesystem to track stats.\n"; 9 | print "set hive.stats.dbclass=fs;\n"; 10 | print "-- Many tables have some missing partitions, deal with this by ignoring errors.\n"; 11 | print "set hive.cli.errors.ignore=true;\n\n"; 12 | 13 | %partitions = ( 14 | catalog_returns => "cr_returned_date", 15 | catalog_sales => "cs_sold_date", 16 | inventory => "inv_date", 17 | store_returns => "sr_returned_date", 18 | store_sales => "ss_sold_date", 19 | web_returns => "wr_returned_date", 20 | web_sales => "ws_sold_date", 21 | ); 22 | 23 | open(TEMP, $ARGV[0]); 24 | $old = $/; 25 | $/ = undef; 26 | $x = ; 27 | $x =~ m|l table (\S+)|; 28 | $table = $1; 29 | $table =~ s/\(//g; 30 | 31 | if ($partitions{$table}) { 32 | $partition = " partition(" . $partitions{$table} . ")"; 33 | } 34 | 35 | open(TEMP, $ARGV[0]); 36 | $/ = $old; 37 | while ($_ = ) { 38 | if ($_ =~ m|^[ ,]{4,6}(\S+)|) { 39 | push(@stuff, "$1"); 40 | } 41 | } 42 | $columns = join(', ', @stuff); 43 | @stuff = split('\s', $columns); 44 | print "ANALYZE TABLE $table$partition COMPUTE STATISTICS;\n"; 45 | 46 | $date = "1998-01-01"; 47 | my $strp = DateTime::Format::Strptime->new( 48 | pattern => '%Y-%m-%d' 49 | ); 50 | $dt = $strp->parse_datetime($date); 51 | 52 | if ($partitions{$table}) { 53 | $year = "1998"; 54 | while ($year < 2003) { 55 | $partitionv = "'" . $dt->strftime("%Y-%m-%d") . "'"; 56 | $partition = " partition(" . $partitions{$table} . "=$partitionv)"; 57 | print "!echo Analyzing $table $partitionv;\n"; 58 | print "ANALYZE TABLE $table$partition COMPUTE STATISTICS FOR COLUMNS \n"; 59 | print wrap("\t", "\t", @stuff); 60 | print ";\n\n"; 61 | $dt->add(days => 1); 62 | $year = $dt->strftime("%Y"); 63 | } 64 | } else { 65 | print "ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS \n"; 66 | print wrap("\t", "\t", @stuff); 67 | print ";\n\n"; 68 | } 69 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/household_demographics.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists household_demographics; 5 | 6 | create table household_demographics 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.household_demographics; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/income_band.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists income_band; 5 | 6 | create table income_band 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.income_band; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/inventory.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists inventory; 5 | 6 | create table inventory 7 | ( 8 | inv_date_sk int, 9 | inv_item_sk int, 10 | inv_warehouse_sk int, 11 | inv_quantity_on_hand int 12 | ) 13 | partitioned by (inv_date string) 14 | stored as ${FILE}; 15 | 16 | insert overwrite table inventory partition (inv_date) 17 | select 18 | i.inv_date_sk, 19 | i.inv_item_sk, 20 | i.inv_warehouse_sk, 21 | i.inv_quantity_on_hand, 22 | d.d_date as inv_date 23 | from ${SOURCE}.inventory i 24 | join ${SOURCE}.date_dim d 25 | on (d.d_date_sk = i.inv_date_sk); 26 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/item.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists item; 5 | 6 | create table item 7 | ( 8 | i_item_sk int, 9 | i_item_id string, 10 | i_rec_start_date string, 11 | i_rec_end_date string, 12 | i_item_desc string, 13 | i_current_price float, 14 | i_wholesale_cost float, 15 | i_brand_id int, 16 | i_brand string, 17 | i_class_id int, 18 | i_class string, 19 | i_category_id int, 20 | i_category string, 21 | i_manufact_id int, 22 | i_manufact string, 23 | i_size string, 24 | i_formulation string, 25 | i_color string, 26 | i_units string, 27 | i_container string, 28 | i_manager_id int, 29 | i_product_name string 30 | ) 31 | stored as ${FILE}; 32 | 33 | insert overwrite table item 34 | select * from ${SOURCE}.item; 35 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/promotion.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists promotion; 5 | 6 | create table promotion 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.promotion; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/reason.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists reason; 5 | 6 | create table reason 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.reason; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/ship_mode.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists ship_mode; 5 | 6 | create table ship_mode 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.ship_mode; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/store.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store; 5 | 6 | create table store 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.store; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/store_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store_returns; 5 | 6 | create table store_returns 7 | ( 8 | sr_returned_date_sk int, 9 | sr_return_time_sk int, 10 | sr_item_sk int, 11 | sr_customer_sk int, 12 | sr_cdemo_sk int, 13 | sr_hdemo_sk int, 14 | sr_addr_sk int, 15 | sr_store_sk int, 16 | sr_reason_sk int, 17 | sr_ticket_number int, 18 | sr_return_quantity int, 19 | sr_return_amt float, 20 | sr_return_tax float, 21 | sr_return_amt_inc_tax float, 22 | sr_fee float, 23 | sr_return_ship_cost float, 24 | sr_refunded_cash float, 25 | sr_reversed_charge float, 26 | sr_store_credit float, 27 | sr_net_loss float 28 | ) 29 | partitioned by (sr_returned_date string) 30 | stored as ${FILE}; 31 | 32 | insert overwrite table store_returns partition (sr_returned_date) 33 | select 34 | sr.sr_returned_date_sk, 35 | sr.sr_return_time_sk, 36 | sr.sr_item_sk, 37 | sr.sr_customer_sk, 38 | sr.sr_cdemo_sk, 39 | sr.sr_hdemo_sk, 40 | sr.sr_addr_sk, 41 | sr.sr_store_sk, 42 | sr.sr_reason_sk, 43 | sr.sr_ticket_number, 44 | sr.sr_return_quantity, 45 | sr.sr_return_amt, 46 | sr.sr_return_tax, 47 | sr.sr_return_amt_inc_tax, 48 | sr.sr_fee, 49 | sr.sr_return_ship_cost, 50 | sr.sr_refunded_cash, 51 | sr.sr_reversed_charge, 52 | sr.sr_store_credit, 53 | sr.sr_net_loss, 54 | dd.d_date as sr_returned_date 55 | from ${SOURCE}.store_returns sr 56 | join ${SOURCE}.date_dim dd 57 | on (sr.sr_returned_date_sk = dd.d_date_sk); 58 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/store_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store_sales; 5 | 6 | create table store_sales 7 | ( 8 | ss_sold_date_sk int, 9 | ss_sold_time_sk int, 10 | ss_item_sk int, 11 | ss_customer_sk int, 12 | ss_cdemo_sk int, 13 | ss_hdemo_sk int, 14 | ss_addr_sk int, 15 | ss_store_sk int, 16 | ss_promo_sk int, 17 | ss_ticket_number int, 18 | ss_quantity int, 19 | ss_wholesale_cost float, 20 | ss_list_price float, 21 | ss_sales_price float, 22 | ss_ext_discount_amt float, 23 | ss_ext_sales_price float, 24 | ss_ext_wholesale_cost float, 25 | ss_ext_list_price float, 26 | ss_ext_tax float, 27 | ss_coupon_amt float, 28 | ss_net_paid float, 29 | ss_net_paid_inc_tax float, 30 | ss_net_profit float 31 | ) 32 | partitioned by (ss_sold_date string) 33 | stored as ${FILE}; 34 | 35 | insert overwrite table store_sales partition (ss_sold_date) 36 | select 37 | ss.ss_sold_date_sk, 38 | ss.ss_sold_time_sk, 39 | ss.ss_item_sk, 40 | ss.ss_customer_sk, 41 | ss.ss_cdemo_sk, 42 | ss.ss_hdemo_sk, 43 | ss.ss_addr_sk, 44 | ss.ss_store_sk, 45 | ss.ss_promo_sk, 46 | ss.ss_ticket_number, 47 | ss.ss_quantity, 48 | ss.ss_wholesale_cost, 49 | ss.ss_list_price, 50 | ss.ss_sales_price, 51 | ss.ss_ext_discount_amt, 52 | ss.ss_ext_sales_price, 53 | ss.ss_ext_wholesale_cost, 54 | ss.ss_ext_list_price, 55 | ss.ss_ext_tax, 56 | ss.ss_coupon_amt, 57 | ss.ss_net_paid, 58 | ss.ss_net_paid_inc_tax, 59 | ss.ss_net_profit, 60 | dd.d_date as ss_sold_date 61 | from ${SOURCE}.store_sales ss 62 | join ${SOURCE}.date_dim dd 63 | on (ss.ss_sold_date_sk = dd.d_date_sk); 64 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/time_dim.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists time_dim; 5 | 6 | create table time_dim 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.time_dim; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/warehouse.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists warehouse; 5 | 6 | create table warehouse 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.warehouse; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/web_page.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_page; 5 | 6 | create table web_page 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.web_page; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/web_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_returns; 5 | 6 | create table web_returns 7 | ( 8 | wr_returned_date_sk int, 9 | wr_returned_time_sk int, 10 | wr_item_sk int, 11 | wr_refunded_customer_sk int, 12 | wr_refunded_cdemo_sk int, 13 | wr_refunded_hdemo_sk int, 14 | wr_refunded_addr_sk int, 15 | wr_returning_customer_sk int, 16 | wr_returning_cdemo_sk int, 17 | wr_returning_hdemo_sk int, 18 | wr_returning_addr_sk int, 19 | wr_web_page_sk int, 20 | wr_reason_sk int, 21 | wr_order_number int, 22 | wr_return_quantity int, 23 | wr_return_amt float, 24 | wr_return_tax float, 25 | wr_return_amt_inc_tax float, 26 | wr_fee float, 27 | wr_return_ship_cost float, 28 | wr_refunded_cash float, 29 | wr_reversed_charge float, 30 | wr_account_credit float, 31 | wr_net_loss float 32 | ) 33 | partitioned by (wr_returned_date string) 34 | stored as ${FILE}; 35 | 36 | insert overwrite table web_returns partition (wr_returned_date) 37 | select 38 | wr.wr_returned_date_sk, 39 | wr.wr_returned_time_sk, 40 | wr.wr_item_sk, 41 | wr.wr_refunded_customer_sk, 42 | wr.wr_refunded_cdemo_sk, 43 | wr.wr_refunded_hdemo_sk, 44 | wr.wr_refunded_addr_sk, 45 | wr.wr_returning_customer_sk, 46 | wr.wr_returning_cdemo_sk, 47 | wr.wr_returning_hdemo_sk, 48 | wr.wr_returning_addr_sk, 49 | wr.wr_web_page_sk, 50 | wr.wr_reason_sk, 51 | wr.wr_order_number, 52 | wr.wr_return_quantity, 53 | wr.wr_return_amt, 54 | wr.wr_return_tax, 55 | wr.wr_return_amt_inc_tax, 56 | wr.wr_fee, 57 | wr.wr_return_ship_cost, 58 | wr.wr_refunded_cash, 59 | wr.wr_reversed_charge, 60 | wr.wr_account_credit, 61 | wr.wr_net_loss, 62 | dd.d_date as wr_returned_date 63 | from ${SOURCE}.web_returns wr 64 | join ${SOURCE}.date_dim dd 65 | on (wr.wr_returned_date_sk = dd.d_date_sk); 66 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/web_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_sales; 5 | 6 | create table web_sales 7 | ( 8 | ws_sold_date_sk int, 9 | ws_sold_time_sk int, 10 | ws_ship_date_sk int, 11 | ws_item_sk int, 12 | ws_bill_customer_sk int, 13 | ws_bill_cdemo_sk int, 14 | ws_bill_hdemo_sk int, 15 | ws_bill_addr_sk int, 16 | ws_ship_customer_sk int, 17 | ws_ship_cdemo_sk int, 18 | ws_ship_hdemo_sk int, 19 | ws_ship_addr_sk int, 20 | ws_web_page_sk int, 21 | ws_web_site_sk int, 22 | ws_ship_mode_sk int, 23 | ws_warehouse_sk int, 24 | ws_promo_sk int, 25 | ws_order_number int, 26 | ws_quantity int, 27 | ws_wholesale_cost float, 28 | ws_list_price float, 29 | ws_sales_price float, 30 | ws_ext_discount_amt float, 31 | ws_ext_sales_price float, 32 | ws_ext_wholesale_cost float, 33 | ws_ext_list_price float, 34 | ws_ext_tax float, 35 | ws_coupon_amt float, 36 | ws_ext_ship_cost float, 37 | ws_net_paid float, 38 | ws_net_paid_inc_tax float, 39 | ws_net_paid_inc_ship float, 40 | ws_net_paid_inc_ship_tax float, 41 | ws_net_profit float 42 | ) 43 | partitioned by (ws_sold_date string) 44 | stored as ${FILE}; 45 | 46 | insert overwrite table web_sales partition (ws_sold_date) 47 | select 48 | ws.ws_sold_date_sk, 49 | ws.ws_sold_time_sk, 50 | ws.ws_ship_date_sk, 51 | ws.ws_item_sk, 52 | ws.ws_bill_customer_sk, 53 | ws.ws_bill_cdemo_sk, 54 | ws.ws_bill_hdemo_sk, 55 | ws.ws_bill_addr_sk, 56 | ws.ws_ship_customer_sk, 57 | ws.ws_ship_cdemo_sk, 58 | ws.ws_ship_hdemo_sk, 59 | ws.ws_ship_addr_sk, 60 | ws.ws_web_page_sk, 61 | ws.ws_web_site_sk, 62 | ws.ws_ship_mode_sk, 63 | ws.ws_warehouse_sk, 64 | ws.ws_promo_sk, 65 | ws.ws_order_number, 66 | ws.ws_quantity, 67 | ws.ws_wholesale_cost, 68 | ws.ws_list_price, 69 | ws.ws_sales_price, 70 | ws.ws_ext_discount_amt, 71 | ws.ws_ext_sales_price, 72 | ws.ws_ext_wholesale_cost, 73 | ws.ws_ext_list_price, 74 | ws.ws_ext_tax, 75 | ws.ws_coupon_amt, 76 | ws.ws_ext_ship_cost, 77 | ws.ws_net_paid, 78 | ws.ws_net_paid_inc_tax, 79 | ws.ws_net_paid_inc_ship, 80 | ws.ws_net_paid_inc_ship_tax, 81 | ws.ws_net_profit, 82 | dd.d_date as ws_sold_date 83 | from ${SOURCE}.web_sales ws 84 | join ${SOURCE}.date_dim dd 85 | on (ws.ws_sold_date_sk = dd.d_date_sk); 86 | -------------------------------------------------------------------------------- /ddl-tpcds/bin_partitioned/web_site.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_site; 5 | 6 | create table web_site 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.web_site; 9 | -------------------------------------------------------------------------------- /ddl-tpcds/text/call_center.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists call_center; 5 | 6 | create external table call_center( 7 | cc_call_center_sk int 8 | , cc_call_center_id string 9 | , cc_rec_start_date string 10 | , cc_rec_end_date string 11 | , cc_closed_date_sk int 12 | , cc_open_date_sk int 13 | , cc_name string 14 | , cc_class string 15 | , cc_employees int 16 | , cc_sq_ft int 17 | , cc_hours string 18 | , cc_manager string 19 | , cc_mkt_id int 20 | , cc_mkt_class string 21 | , cc_mkt_desc string 22 | , cc_market_manager string 23 | , cc_division int 24 | , cc_division_name string 25 | , cc_company int 26 | , cc_company_name string 27 | , cc_street_number string 28 | , cc_street_name string 29 | , cc_street_type string 30 | , cc_suite_number string 31 | , cc_city string 32 | , cc_county string 33 | , cc_state string 34 | , cc_zip string 35 | , cc_country string 36 | , cc_gmt_offset float 37 | , cc_tax_percentage float 38 | ) 39 | row format delimited fields terminated by '|' 40 | location '${LOCATION}'; 41 | -------------------------------------------------------------------------------- /ddl-tpcds/text/catalog_page.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_page; 5 | 6 | create external table catalog_page( 7 | cp_catalog_page_sk int 8 | , cp_catalog_page_id string 9 | , cp_start_date_sk int 10 | , cp_end_date_sk int 11 | , cp_department string 12 | , cp_catalog_number int 13 | , cp_catalog_page_number int 14 | , cp_description string 15 | , cp_type string 16 | ) 17 | row format delimited fields terminated by '|' 18 | location '${LOCATION}'; 19 | -------------------------------------------------------------------------------- /ddl-tpcds/text/catalog_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_returns; 5 | 6 | create external table catalog_returns 7 | ( 8 | cr_returned_date_sk int, 9 | cr_returned_time_sk int, 10 | cr_item_sk int, 11 | cr_refunded_customer_sk int, 12 | cr_refunded_cdemo_sk int, 13 | cr_refunded_hdemo_sk int, 14 | cr_refunded_addr_sk int, 15 | cr_returning_customer_sk int, 16 | cr_returning_cdemo_sk int, 17 | cr_returning_hdemo_sk int, 18 | cr_returning_addr_sk int, 19 | cr_call_center_sk int, 20 | cr_catalog_page_sk int, 21 | cr_ship_mode_sk int, 22 | cr_warehouse_sk int, 23 | cr_reason_sk int, 24 | cr_order_number int, 25 | cr_return_quantity int, 26 | cr_return_amount float, 27 | cr_return_tax float, 28 | cr_return_amt_inc_tax float, 29 | cr_fee float, 30 | cr_return_ship_cost float, 31 | cr_refunded_cash float, 32 | cr_reversed_charge float, 33 | cr_store_credit float, 34 | cr_net_loss float 35 | ) 36 | row format delimited fields terminated by '|' 37 | location '${LOCATION}'; 38 | -------------------------------------------------------------------------------- /ddl-tpcds/text/catalog_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_sales; 5 | 6 | create external table catalog_sales 7 | ( 8 | cs_sold_date_sk int, 9 | cs_sold_time_sk int, 10 | cs_ship_date_sk int, 11 | cs_bill_customer_sk int, 12 | cs_bill_cdemo_sk int, 13 | cs_bill_hdemo_sk int, 14 | cs_bill_addr_sk int, 15 | cs_ship_customer_sk int, 16 | cs_ship_cdemo_sk int, 17 | cs_ship_hdemo_sk int, 18 | cs_ship_addr_sk int, 19 | cs_call_center_sk int, 20 | cs_catalog_page_sk int, 21 | cs_ship_mode_sk int, 22 | cs_warehouse_sk int, 23 | cs_item_sk int, 24 | cs_promo_sk int, 25 | cs_order_number int, 26 | cs_quantity int, 27 | cs_wholesale_cost float, 28 | cs_list_price float, 29 | cs_sales_price float, 30 | cs_ext_discount_amt float, 31 | cs_ext_sales_price float, 32 | cs_ext_wholesale_cost float, 33 | cs_ext_list_price float, 34 | cs_ext_tax float, 35 | cs_coupon_amt float, 36 | cs_ext_ship_cost float, 37 | cs_net_paid float, 38 | cs_net_paid_inc_tax float, 39 | cs_net_paid_inc_ship float, 40 | cs_net_paid_inc_ship_tax float, 41 | cs_net_profit float 42 | ) 43 | row format delimited fields terminated by '|' 44 | location '${LOCATION}'; 45 | -------------------------------------------------------------------------------- /ddl-tpcds/text/customer.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer; 5 | 6 | create external table customer 7 | ( 8 | c_customer_sk int, 9 | c_customer_id string, 10 | c_current_cdemo_sk int, 11 | c_current_hdemo_sk int, 12 | c_current_addr_sk int, 13 | c_first_shipto_date_sk int, 14 | c_first_sales_date_sk int, 15 | c_salutation string, 16 | c_first_name string, 17 | c_last_name string, 18 | c_preferred_cust_flag string, 19 | c_birth_day int, 20 | c_birth_month int, 21 | c_birth_year int, 22 | c_birth_country string, 23 | c_login string, 24 | c_email_address string, 25 | c_last_review_date string 26 | ) 27 | row format delimited fields terminated by '|' 28 | location '${LOCATION}'; 29 | -------------------------------------------------------------------------------- /ddl-tpcds/text/customer_address.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer_address; 5 | 6 | create external table customer_address 7 | ( 8 | ca_address_sk int, 9 | ca_address_id string, 10 | ca_street_number string, 11 | ca_street_name string, 12 | ca_street_type string, 13 | ca_suite_number string, 14 | ca_city string, 15 | ca_county string, 16 | ca_state string, 17 | ca_zip string, 18 | ca_country string, 19 | ca_gmt_offset float, 20 | ca_location_type string 21 | ) 22 | row format delimited fields terminated by '|' 23 | location '${LOCATION}'; 24 | -------------------------------------------------------------------------------- /ddl-tpcds/text/customer_demographics.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer_demographics; 5 | 6 | create external table customer_demographics 7 | ( 8 | cd_demo_sk int, 9 | cd_gender string, 10 | cd_marital_status string, 11 | cd_education_status string, 12 | cd_purchase_estimate int, 13 | cd_credit_rating string, 14 | cd_dep_count int, 15 | cd_dep_employed_count int, 16 | cd_dep_college_count int 17 | ) 18 | row format delimited fields terminated by '|' 19 | location '${LOCATION}'; 20 | -------------------------------------------------------------------------------- /ddl-tpcds/text/date_dim.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists date_dim; 5 | 6 | create external table date_dim 7 | ( 8 | d_date_sk int, 9 | d_date_id string, 10 | d_date string, 11 | d_month_seq int, 12 | d_week_seq int, 13 | d_quarter_seq int, 14 | d_year int, 15 | d_dow int, 16 | d_moy int, 17 | d_dom int, 18 | d_qoy int, 19 | d_fy_year int, 20 | d_fy_quarter_seq int, 21 | d_fy_week_seq int, 22 | d_day_name string, 23 | d_quarter_name string, 24 | d_holiday string, 25 | d_weekend string, 26 | d_following_holiday string, 27 | d_first_dom int, 28 | d_last_dom int, 29 | d_same_day_ly int, 30 | d_same_day_lq int, 31 | d_current_day string, 32 | d_current_week string, 33 | d_current_month string, 34 | d_current_quarter string, 35 | d_current_year string 36 | ) 37 | row format delimited fields terminated by '|' 38 | location '${LOCATION}'; 39 | -------------------------------------------------------------------------------- /ddl-tpcds/text/generate_analyze.pl: -------------------------------------------------------------------------------- 1 | use Text::Wrap; 2 | 3 | use DateTime; 4 | use DateTime::Format::Strptime; 5 | 6 | $Text::Wrap::columns = 72; 7 | 8 | print "-- Use filesystem to track stats.\n"; 9 | print "set hive.stats.dbclass=fs;\n"; 10 | print "-- Many tables have some missing partitions, deal with this by ignoring errors.\n"; 11 | print "set hive.cli.errors.ignore=true;\n\n"; 12 | 13 | %partitions = ( 14 | catalog_returns => "cr_returned_date", 15 | catalog_sales => "cs_sold_date", 16 | inventory => "inv_date", 17 | store_returns => "sr_returned_date", 18 | store_sales => "ss_sold_date", 19 | web_returns => "wr_returned_date", 20 | web_sales => "ws_sold_date", 21 | ); 22 | 23 | open(TEMP, $ARGV[0]); 24 | $old = $/; 25 | $/ = undef; 26 | $x = ; 27 | $x =~ m|l table (\S+)|; 28 | $table = $1; 29 | $table =~ s/\(//g; 30 | 31 | if ($partitions{$table}) { 32 | $partition = " partition(" . $partitions{$table} . ")"; 33 | } 34 | 35 | open(TEMP, $ARGV[0]); 36 | $/ = $old; 37 | while ($_ = ) { 38 | if ($_ =~ m|^[ ,]{4,6}(\S+)|) { 39 | push(@stuff, "$1"); 40 | } 41 | } 42 | $columns = join(', ', @stuff); 43 | @stuff = split('\s', $columns); 44 | print "ANALYZE TABLE $table$partition COMPUTE STATISTICS;\n"; 45 | 46 | $date = "1998-01-01"; 47 | my $strp = DateTime::Format::Strptime->new( 48 | pattern => '%Y-%m-%d' 49 | ); 50 | $dt = $strp->parse_datetime($date); 51 | 52 | if ($partitions{$table}) { 53 | $year = "1998"; 54 | while ($year < 2003) { 55 | $partitionv = "'" . $dt->strftime("%Y-%m-%d") . "'"; 56 | $partition = " partition(" . $partitions{$table} . "=$partitionv)"; 57 | print "!echo Analyzing $table $partitionv;\n"; 58 | print "ANALYZE TABLE $table$partition COMPUTE STATISTICS FOR COLUMNS \n"; 59 | print wrap("\t", "\t", @stuff); 60 | print ";\n\n"; 61 | $dt->add(days => 1); 62 | $year = $dt->strftime("%Y"); 63 | } 64 | } else { 65 | print "ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS \n"; 66 | print wrap("\t", "\t", @stuff); 67 | print ";\n\n"; 68 | } 69 | -------------------------------------------------------------------------------- /ddl-tpcds/text/household_demographics.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists household_demographics; 5 | 6 | create external table household_demographics 7 | ( 8 | hd_demo_sk int, 9 | hd_income_band_sk int, 10 | hd_buy_potential string, 11 | hd_dep_count int, 12 | hd_vehicle_count int 13 | ) 14 | row format delimited fields terminated by '|' 15 | location '${LOCATION}'; 16 | -------------------------------------------------------------------------------- /ddl-tpcds/text/income_band.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists income_band; 5 | 6 | create external table income_band( 7 | ib_income_band_sk int 8 | , ib_lower_bound int 9 | , ib_upper_bound int 10 | ) 11 | row format delimited fields terminated by '|' 12 | location '${LOCATION}'; 13 | -------------------------------------------------------------------------------- /ddl-tpcds/text/inventory.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists inventory; 5 | 6 | create external table inventory 7 | ( 8 | inv_date_sk int, 9 | inv_item_sk int, 10 | inv_warehouse_sk int, 11 | inv_quantity_on_hand int 12 | ) 13 | row format delimited fields terminated by '|' 14 | location '${LOCATION}'; 15 | -------------------------------------------------------------------------------- /ddl-tpcds/text/item.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists item; 5 | 6 | create external table item 7 | ( 8 | i_item_sk int, 9 | i_item_id string, 10 | i_rec_start_date string, 11 | i_rec_end_date string, 12 | i_item_desc string, 13 | i_current_price float, 14 | i_wholesale_cost float, 15 | i_brand_id int, 16 | i_brand string, 17 | i_class_id int, 18 | i_class string, 19 | i_category_id int, 20 | i_category string, 21 | i_manufact_id int, 22 | i_manufact string, 23 | i_size string, 24 | i_formulation string, 25 | i_color string, 26 | i_units string, 27 | i_container string, 28 | i_manager_id int, 29 | i_product_name string 30 | ) 31 | row format delimited fields terminated by '|' 32 | location '${LOCATION}'; 33 | -------------------------------------------------------------------------------- /ddl-tpcds/text/promotion.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists promotion; 5 | 6 | create external table promotion 7 | ( 8 | p_promo_sk int, 9 | p_promo_id string, 10 | p_start_date_sk int, 11 | p_end_date_sk int, 12 | p_item_sk int, 13 | p_cost float, 14 | p_response_target int, 15 | p_promo_name string, 16 | p_channel_dmail string, 17 | p_channel_email string, 18 | p_channel_catalog string, 19 | p_channel_tv string, 20 | p_channel_radio string, 21 | p_channel_press string, 22 | p_channel_event string, 23 | p_channel_demo string, 24 | p_channel_details string, 25 | p_purpose string, 26 | p_discount_active string 27 | ) 28 | row format delimited fields terminated by '|' 29 | location '${LOCATION}'; 30 | -------------------------------------------------------------------------------- /ddl-tpcds/text/reason.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists reason; 5 | 6 | create external table reason( 7 | r_reason_sk int 8 | , r_reason_id string 9 | , r_reason_desc string 10 | ) 11 | row format delimited fields terminated by '|' 12 | location '${LOCATION}'; 13 | -------------------------------------------------------------------------------- /ddl-tpcds/text/ship_mode.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists ship_mode; 5 | 6 | create external table ship_mode( 7 | sm_ship_mode_sk int 8 | , sm_ship_mode_id string 9 | , sm_type string 10 | , sm_code string 11 | , sm_carrier string 12 | , sm_contract string 13 | ) 14 | row format delimited fields terminated by '|' 15 | location '${LOCATION}'; 16 | -------------------------------------------------------------------------------- /ddl-tpcds/text/store.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store; 5 | 6 | create external table store 7 | ( 8 | s_store_sk int, 9 | s_store_id string, 10 | s_rec_start_date string, 11 | s_rec_end_date string, 12 | s_closed_date_sk int, 13 | s_store_name string, 14 | s_number_employees int, 15 | s_floor_space int, 16 | s_hours string, 17 | s_manager string, 18 | s_market_id int, 19 | s_geography_class string, 20 | s_market_desc string, 21 | s_market_manager string, 22 | s_division_id int, 23 | s_division_name string, 24 | s_company_id int, 25 | s_company_name string, 26 | s_street_number string, 27 | s_street_name string, 28 | s_street_type string, 29 | s_suite_number string, 30 | s_city string, 31 | s_county string, 32 | s_state string, 33 | s_zip string, 34 | s_country string, 35 | s_gmt_offset float, 36 | s_tax_precentage float 37 | ) 38 | row format delimited fields terminated by '|' 39 | location '${LOCATION}'; 40 | -------------------------------------------------------------------------------- /ddl-tpcds/text/store_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store_returns; 5 | 6 | create external table store_returns 7 | ( 8 | sr_returned_date_sk int, 9 | sr_return_time_sk int, 10 | sr_item_sk int, 11 | sr_customer_sk int, 12 | sr_cdemo_sk int, 13 | sr_hdemo_sk int, 14 | sr_addr_sk int, 15 | sr_store_sk int, 16 | sr_reason_sk int, 17 | sr_ticket_number int, 18 | sr_return_quantity int, 19 | sr_return_amt float, 20 | sr_return_tax float, 21 | sr_return_amt_inc_tax float, 22 | sr_fee float, 23 | sr_return_ship_cost float, 24 | sr_refunded_cash float, 25 | sr_reversed_charge float, 26 | sr_store_credit float, 27 | sr_net_loss float 28 | ) 29 | row format delimited fields terminated by '|' 30 | location '${LOCATION}'; 31 | -------------------------------------------------------------------------------- /ddl-tpcds/text/store_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store_sales; 5 | 6 | create external table store_sales 7 | ( 8 | ss_sold_date_sk int, 9 | ss_sold_time_sk int, 10 | ss_item_sk int, 11 | ss_customer_sk int, 12 | ss_cdemo_sk int, 13 | ss_hdemo_sk int, 14 | ss_addr_sk int, 15 | ss_store_sk int, 16 | ss_promo_sk int, 17 | ss_ticket_number int, 18 | ss_quantity int, 19 | ss_wholesale_cost float, 20 | ss_list_price float, 21 | ss_sales_price float, 22 | ss_ext_discount_amt float, 23 | ss_ext_sales_price float, 24 | ss_ext_wholesale_cost float, 25 | ss_ext_list_price float, 26 | ss_ext_tax float, 27 | ss_coupon_amt float, 28 | ss_net_paid float, 29 | ss_net_paid_inc_tax float, 30 | ss_net_profit float 31 | ) 32 | row format delimited fields terminated by '|' 33 | location '${LOCATION}'; 34 | -------------------------------------------------------------------------------- /ddl-tpcds/text/temp.pl: -------------------------------------------------------------------------------- 1 | use Text::Wrap; 2 | 3 | use DateTime; 4 | use DateTime::Format::Strptime; 5 | 6 | $Text::Wrap::columns = 72; 7 | 8 | print "set hive.stats.dbclass=fs;\n\n"; 9 | 10 | %partitions = ( 11 | catalog_returns => "cr_returned_date", 12 | catalog_sales => "cs_sold_date", 13 | inventory => "inv_date", 14 | store_returns => "sr_returned_date", 15 | store_sales => "ss_sold_date", 16 | web_returns => "wr_returned_date", 17 | web_sales => "ws_sold_date", 18 | ); 19 | 20 | open(TEMP, $ARGV[0]); 21 | $old = $/; 22 | $/ = undef; 23 | $x = ; 24 | $x =~ m|l table (\S+)|; 25 | $table = $1; 26 | $table =~ s/\(//g; 27 | 28 | if ($partitions{$table}) { 29 | $partition = " partition(" . $partitions{$table} . ")"; 30 | } 31 | 32 | open(TEMP, $ARGV[0]); 33 | $/ = $old; 34 | while ($_ = ) { 35 | if ($_ =~ m|^[ ,]{4,6}(\S+)|) { 36 | push(@stuff, "$1"); 37 | } 38 | } 39 | $columns = join(', ', @stuff); 40 | @stuff = split('\s', $columns); 41 | print "ANALYZE TABLE $table$partition COMPUTE STATISTICS;\n"; 42 | 43 | $date = "1998-01-01"; 44 | my $strp = DateTime::Format::Strptime->new( 45 | pattern => '%Y-%m-%d' 46 | ); 47 | $dt = $strp->parse_datetime($date); 48 | 49 | if ($partitions{$table}) { 50 | $year = "1998"; 51 | while ($year < 2003) { 52 | $partitionv = "'" . $dt->strftime("%Y-%m-%d") . "'"; 53 | $partition = " partition(" . $partitions{$table} . "=$partitionv)"; 54 | print "ANALYZE TABLE $table$partition COMPUTE STATISTICS FOR COLUMNS \n"; 55 | print wrap("\t", "\t", @stuff); 56 | print ";\n\n"; 57 | $dt->add(days => 1); 58 | $year = $dt->strftime("%Y"); 59 | } 60 | } else { 61 | print "ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS \n"; 62 | print wrap("\t", "\t", @stuff); 63 | print ";\n\n"; 64 | } 65 | -------------------------------------------------------------------------------- /ddl-tpcds/text/time_dim.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists time_dim; 5 | 6 | create external table time_dim 7 | ( 8 | t_time_sk int, 9 | t_time_id string, 10 | t_time int, 11 | t_hour int, 12 | t_minute int, 13 | t_second int, 14 | t_am_pm string, 15 | t_shift string, 16 | t_sub_shift string, 17 | t_meal_time string 18 | ) 19 | row format delimited fields terminated by '|' 20 | location '${LOCATION}'; 21 | -------------------------------------------------------------------------------- /ddl-tpcds/text/warehouse.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists warehouse; 5 | 6 | create external table warehouse( 7 | w_warehouse_sk int 8 | , w_warehouse_id string 9 | , w_warehouse_name string 10 | , w_warehouse_sq_ft int 11 | , w_street_number string 12 | , w_street_name string 13 | , w_street_type string 14 | , w_suite_number string 15 | , w_city string 16 | , w_county string 17 | , w_state string 18 | , w_zip string 19 | , w_country string 20 | , w_gmt_offset float 21 | ) 22 | row format delimited fields terminated by '|' 23 | location '${LOCATION}'; 24 | -------------------------------------------------------------------------------- /ddl-tpcds/text/web_page.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_page; 5 | 6 | create external table web_page( 7 | wp_web_page_sk int 8 | , wp_web_page_id string 9 | , wp_rec_start_date string 10 | , wp_rec_end_date string 11 | , wp_creation_date_sk int 12 | , wp_access_date_sk int 13 | , wp_autogen_flag string 14 | , wp_customer_sk int 15 | , wp_url string 16 | , wp_type string 17 | , wp_char_count int 18 | , wp_link_count int 19 | , wp_image_count int 20 | , wp_max_ad_count int 21 | ) 22 | row format delimited fields terminated by '|' 23 | location '${LOCATION}'; 24 | -------------------------------------------------------------------------------- /ddl-tpcds/text/web_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_returns; 5 | 6 | create external table web_returns 7 | ( 8 | wr_returned_date_sk int, 9 | wr_returned_time_sk int, 10 | wr_item_sk int, 11 | wr_refunded_customer_sk int, 12 | wr_refunded_cdemo_sk int, 13 | wr_refunded_hdemo_sk int, 14 | wr_refunded_addr_sk int, 15 | wr_returning_customer_sk int, 16 | wr_returning_cdemo_sk int, 17 | wr_returning_hdemo_sk int, 18 | wr_returning_addr_sk int, 19 | wr_web_page_sk int, 20 | wr_reason_sk int, 21 | wr_order_number int, 22 | wr_return_quantity int, 23 | wr_return_amt float, 24 | wr_return_tax float, 25 | wr_return_amt_inc_tax float, 26 | wr_fee float, 27 | wr_return_ship_cost float, 28 | wr_refunded_cash float, 29 | wr_reversed_charge float, 30 | wr_account_credit float, 31 | wr_net_loss float 32 | ) 33 | row format delimited fields terminated by '|' 34 | location '${LOCATION}'; 35 | -------------------------------------------------------------------------------- /ddl-tpcds/text/web_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_sales; 5 | 6 | create external table web_sales 7 | ( 8 | ws_sold_date_sk int, 9 | ws_sold_time_sk int, 10 | ws_ship_date_sk int, 11 | ws_item_sk int, 12 | ws_bill_customer_sk int, 13 | ws_bill_cdemo_sk int, 14 | ws_bill_hdemo_sk int, 15 | ws_bill_addr_sk int, 16 | ws_ship_customer_sk int, 17 | ws_ship_cdemo_sk int, 18 | ws_ship_hdemo_sk int, 19 | ws_ship_addr_sk int, 20 | ws_web_page_sk int, 21 | ws_web_site_sk int, 22 | ws_ship_mode_sk int, 23 | ws_warehouse_sk int, 24 | ws_promo_sk int, 25 | ws_order_number int, 26 | ws_quantity int, 27 | ws_wholesale_cost float, 28 | ws_list_price float, 29 | ws_sales_price float, 30 | ws_ext_discount_amt float, 31 | ws_ext_sales_price float, 32 | ws_ext_wholesale_cost float, 33 | ws_ext_list_price float, 34 | ws_ext_tax float, 35 | ws_coupon_amt float, 36 | ws_ext_ship_cost float, 37 | ws_net_paid float, 38 | ws_net_paid_inc_tax float, 39 | ws_net_paid_inc_ship float, 40 | ws_net_paid_inc_ship_tax float, 41 | ws_net_profit float 42 | ) 43 | row format delimited fields terminated by '|' 44 | location '${LOCATION}'; 45 | -------------------------------------------------------------------------------- /ddl-tpcds/text/web_site.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_site; 5 | 6 | create external table web_site 7 | ( 8 | web_site_sk int, 9 | web_site_id string, 10 | web_rec_start_date string, 11 | web_rec_end_date string, 12 | web_name string, 13 | web_open_date_sk int, 14 | web_close_date_sk int, 15 | web_class string, 16 | web_manager string, 17 | web_mkt_id int, 18 | web_mkt_class string, 19 | web_mkt_desc string, 20 | web_market_manager string, 21 | web_company_id int, 22 | web_company_name string, 23 | web_street_number string, 24 | web_street_name string, 25 | web_street_type string, 26 | web_suite_number string, 27 | web_city string, 28 | web_county string, 29 | web_state string, 30 | web_zip string, 31 | web_country string, 32 | web_gmt_offset float, 33 | web_tax_percentage float 34 | ) 35 | row format delimited fields terminated by '|' 36 | location '${LOCATION}'; 37 | -------------------------------------------------------------------------------- /ddl-tpch/bin_flat/alltables.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists lineitem; 5 | create external table lineitem 6 | (L_ORDERKEY INT, 7 | L_PARTKEY INT, 8 | L_SUPPKEY INT, 9 | L_LINENUMBER INT, 10 | L_QUANTITY DOUBLE, 11 | L_EXTENDEDPRICE DOUBLE, 12 | L_DISCOUNT DOUBLE, 13 | L_TAX DOUBLE, 14 | L_RETURNFLAG STRING, 15 | L_LINESTATUS STRING, 16 | L_SHIPDATE STRING, 17 | L_COMMITDATE STRING, 18 | L_RECEIPTDATE STRING, 19 | L_SHIPINSTRUCT STRING, 20 | L_SHIPMODE STRING, 21 | L_COMMENT STRING) 22 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 23 | LOCATION '${LOCATION}/lineitem'; 24 | 25 | drop table if exists part; 26 | create external table part (P_PARTKEY INT, 27 | P_NAME STRING, 28 | P_MFGR STRING, 29 | P_BRAND STRING, 30 | P_TYPE STRING, 31 | P_SIZE INT, 32 | P_CONTAINER STRING, 33 | P_RETAILPRICE DOUBLE, 34 | P_COMMENT STRING) 35 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 36 | LOCATION '${LOCATION}/part/'; 37 | 38 | drop table if exists supplier; 39 | create external table supplier (S_SUPPKEY INT, 40 | S_NAME STRING, 41 | S_ADDRESS STRING, 42 | S_NATIONKEY INT, 43 | S_PHONE STRING, 44 | S_ACCTBAL DOUBLE, 45 | S_COMMENT STRING) 46 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 47 | LOCATION '${LOCATION}/supplier/'; 48 | 49 | drop table if exists partsupp; 50 | create external table partsupp (PS_PARTKEY INT, 51 | PS_SUPPKEY INT, 52 | PS_AVAILQTY INT, 53 | PS_SUPPLYCOST DOUBLE, 54 | PS_COMMENT STRING) 55 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 56 | LOCATION'${LOCATION}/partsupp'; 57 | 58 | drop table if exists nation; 59 | create external table nation (N_NATIONKEY INT, 60 | N_NAME STRING, 61 | N_REGIONKEY INT, 62 | N_COMMENT STRING) 63 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 64 | LOCATION '${LOCATION}/nation'; 65 | 66 | drop table if exists region; 67 | create external table region (R_REGIONKEY INT, 68 | R_NAME STRING, 69 | R_COMMENT STRING) 70 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 71 | LOCATION '${LOCATION}/region'; 72 | 73 | drop table if exists customer; 74 | create external table customer (C_CUSTKEY INT, 75 | C_NAME STRING, 76 | C_ADDRESS STRING, 77 | C_NATIONKEY INT, 78 | C_PHONE STRING, 79 | C_ACCTBAL DOUBLE, 80 | C_MKTSEGMENT STRING, 81 | C_COMMENT STRING) 82 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 83 | LOCATION '${LOCATION}/customer'; 84 | 85 | drop table if exists orders; 86 | create external table orders (O_ORDERKEY INT, 87 | O_CUSTKEY INT, 88 | O_ORDERSTATUS STRING, 89 | O_TOTALPRICE DOUBLE, 90 | O_ORDERDATE STRING, 91 | O_ORDERPRIORITY STRING, 92 | O_CLERK STRING, 93 | O_SHIPPRIORITY INT, 94 | O_COMMENT STRING) 95 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 96 | LOCATION '${LOCATION}/orders'; 97 | -------------------------------------------------------------------------------- /ddl-tpch/bin_flat/customer.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer; 5 | 6 | create table customer 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.customer; 9 | -------------------------------------------------------------------------------- /ddl-tpch/bin_flat/lineitem.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists lineitem; 5 | 6 | create table lineitem 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.lineitem; 9 | -------------------------------------------------------------------------------- /ddl-tpch/bin_flat/nation.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists nation; 5 | 6 | create table nation 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.nation; 9 | -------------------------------------------------------------------------------- /ddl-tpch/bin_flat/orders.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists orders; 5 | 6 | create table orders 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.orders; 9 | -------------------------------------------------------------------------------- /ddl-tpch/bin_flat/part.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists part; 5 | 6 | create table part 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.part; 9 | -------------------------------------------------------------------------------- /ddl-tpch/bin_flat/partsupp.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists partsupp; 5 | 6 | create table partsupp 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.partsupp; 9 | -------------------------------------------------------------------------------- /ddl-tpch/bin_flat/region.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists region; 5 | 6 | create table region 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.region; 9 | -------------------------------------------------------------------------------- /ddl-tpch/bin_flat/supplier.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists supplier; 5 | 6 | create table supplier 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.supplier; 9 | -------------------------------------------------------------------------------- /runSuite.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | use File::Basename; 6 | 7 | # PROTOTYPES 8 | sub dieWithUsage(;$); 9 | 10 | # GLOBALS 11 | my $SCRIPT_NAME = basename( __FILE__ ); 12 | my $SCRIPT_PATH = dirname( __FILE__ ); 13 | 14 | # MAIN 15 | dieWithUsage("one or more parameters not defined") unless @ARGV >= 1; 16 | my $suite = shift; 17 | my $scale = shift || 2; 18 | dieWithUsage("suite name required") unless $suite eq "tpcds" or $suite eq "tpch"; 19 | 20 | chdir $SCRIPT_PATH; 21 | if( $suite eq 'tpcds' ) { 22 | chdir "sample-queries-tpcds"; 23 | } else { 24 | chdir 'sample-queries-tpch'; 25 | } # end if 26 | my @queries = glob '*.sql'; 27 | 28 | my $db = { 29 | 'tpcds' => "tpcds_bin_partitioned_orc_$scale", 30 | 'tpch' => "tpch_flat_orc_$scale" 31 | }; 32 | 33 | print "filename,status,time,rows\n"; 34 | for my $query ( @queries ) { 35 | my $logname = "$query.log"; 36 | my $cmd="echo 'use $db->{${suite}}; source $query;' | hive -i testbench.settings 2>&1 | tee $query.log"; 37 | # my $cmd="cat $query.log"; 38 | #print $cmd ; exit; 39 | 40 | my $hiveStart = time(); 41 | 42 | my @hiveoutput=`$cmd`; 43 | die "${SCRIPT_NAME}:: ERROR: hive command unexpectedly exited \$? = '$?', \$! = '$!'" if $?; 44 | 45 | my $hiveEnd = time(); 46 | my $hiveTime = $hiveEnd - $hiveStart; 47 | foreach my $line ( @hiveoutput ) { 48 | if( $line =~ /Time taken:\s+([\d\.]+)\s+seconds,\s+Fetched:\s+(\d+)\s+row/ ) { 49 | print "$query,success,$hiveTime,$2\n"; 50 | } elsif( 51 | $line =~ /^FAILED: / 52 | # || /Task failed!/ 53 | ) { 54 | print "$query,failed,$hiveTime\n"; 55 | } # end if 56 | } # end while 57 | } # end for 58 | 59 | 60 | sub dieWithUsage(;$) { 61 | my $err = shift || ''; 62 | if( $err ne '' ) { 63 | chomp $err; 64 | $err = "ERROR: $err\n\n"; 65 | } # end if 66 | 67 | print STDERR < 500) 13 | and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 14 | and date_dim.d_qoy = 2 and date_dim.d_year = 2000 15 | and cs_sold_date between '2000-04-01' and '2000-06-30' 16 | group by ca_zip 17 | order by ca_zip 18 | limit 100; 19 | 20 | 21 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query17.sql: -------------------------------------------------------------------------------- 1 | select i_item_id 2 | ,i_item_desc 3 | ,s_state 4 | ,count(ss_quantity) as store_sales_quantitycount 5 | ,avg(ss_quantity) as store_sales_quantityave 6 | ,stddev_samp(ss_quantity) as store_sales_quantitystdev 7 | ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov 8 | ,count(sr_return_quantity) as_store_returns_quantitycount 9 | ,avg(sr_return_quantity) as_store_returns_quantityave 10 | ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev 11 | ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov 12 | ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave 13 | ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev 14 | ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov 15 | from store_sales 16 | ,store_returns 17 | ,catalog_sales 18 | ,date_dim d1 19 | ,date_dim d2 20 | ,date_dim d3 21 | ,store 22 | ,item 23 | where d1.d_quarter_name = '2000Q1' 24 | and d1.d_date_sk = store_sales.ss_sold_date_sk 25 | and ss_sold_date between '2000-01-01' and '2000-03-31' 26 | and item.i_item_sk = store_sales.ss_item_sk 27 | and store.s_store_sk = store_sales.ss_store_sk 28 | and store_sales.ss_customer_sk = store_returns.sr_customer_sk 29 | and store_sales.ss_item_sk = store_returns.sr_item_sk 30 | and store_sales.ss_ticket_number = store_returns.sr_ticket_number 31 | and store_returns.sr_returned_date_sk = d2.d_date_sk 32 | and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') 33 | and sr_returned_date between '2000-01-01' and '2000-09-01' 34 | and store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk 35 | and store_returns.sr_item_sk = catalog_sales.cs_item_sk 36 | and catalog_sales.cs_sold_date_sk = d3.d_date_sk 37 | and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') 38 | and cs_sold_date between '2000-01-01' and '2000-09-31' 39 | group by i_item_id 40 | ,i_item_desc 41 | ,s_state 42 | order by i_item_id 43 | ,i_item_desc 44 | ,s_state 45 | limit 100; 46 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query18.sql: -------------------------------------------------------------------------------- 1 | select i_item_id, 2 | ca_country, 3 | ca_state, 4 | ca_county, 5 | avg( cast(cs_quantity as decimal(12,2))) agg1, 6 | avg( cast(cs_list_price as decimal(12,2))) agg2, 7 | avg( cast(cs_coupon_amt as decimal(12,2))) agg3, 8 | avg( cast(cs_sales_price as decimal(12,2))) agg4, 9 | avg( cast(cs_net_profit as decimal(12,2))) agg5, 10 | avg( cast(c_birth_year as decimal(12,2))) agg6, 11 | avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 12 | from catalog_sales, customer_demographics cd1, 13 | customer_demographics cd2, customer, customer_address, date_dim, item 14 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and 15 | catalog_sales.cs_item_sk = item.i_item_sk and 16 | catalog_sales.cs_bill_cdemo_sk = cd1.cd_demo_sk and 17 | catalog_sales.cs_bill_customer_sk = customer.c_customer_sk and 18 | cd1.cd_gender = 'M' and 19 | cd1.cd_education_status = 'College' and 20 | customer.c_current_cdemo_sk = cd2.cd_demo_sk and 21 | customer.c_current_addr_sk = customer_address.ca_address_sk and 22 | c_birth_month in (9,5,12,4,1,10) and 23 | d_year = 2001 and 24 | cs_sold_date between '2001-01-01' and '2001-12-31' and 25 | ca_state in ('ND','WI','AL' 26 | ,'NC','OK','MS','TN') 27 | group by i_item_id, ca_country, ca_state, ca_county with rollup 28 | order by ca_country, 29 | ca_state, 30 | ca_county, 31 | i_item_id 32 | limit 100; 33 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query19.sql: -------------------------------------------------------------------------------- 1 | 2 | select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item,customer,customer_address,store 5 | where date_dim.d_date_sk = store_sales.ss_sold_date_sk 6 | and store_sales.ss_item_sk = item.i_item_sk 7 | and i_manager_id=7 8 | and d_moy=11 9 | and d_year=1999 10 | and ss_sold_date between '1999-11-01' and '1999-11-31' 11 | and store_sales.ss_customer_sk = customer.c_customer_sk 12 | and customer.c_current_addr_sk = customer_address.ca_address_sk 13 | and substr(ca_zip,1,5) <> substr(s_zip,1,5) 14 | and store_sales.ss_store_sk = store.s_store_sk 15 | group by i_brand 16 | ,i_brand_id 17 | ,i_manufact_id 18 | ,i_manufact 19 | order by ext_price desc 20 | ,i_brand 21 | ,i_brand_id 22 | ,i_manufact_id 23 | ,i_manufact 24 | limit 100 ; 25 | 26 | 27 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query20.sql: -------------------------------------------------------------------------------- 1 | select i_item_desc 2 | ,i_category 3 | ,i_class 4 | ,i_current_price 5 | ,i_item_id 6 | ,sum(cs_ext_sales_price) as itemrevenue 7 | ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from catalog_sales 10 | ,item 11 | ,date_dim 12 | where catalog_sales.cs_item_sk = item.i_item_sk 13 | and i_category in ('Jewelry', 'Sports', 'Books') 14 | and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 15 | and d_date between '2001-01-12' and '2001-02-11' 16 | and cs_sold_date between '2001-01-12' and '2001-02-11' 17 | group by i_item_id 18 | ,i_item_desc 19 | ,i_category 20 | ,i_class 21 | ,i_current_price 22 | order by i_category 23 | ,i_class 24 | ,i_item_id 25 | ,i_item_desc 26 | ,revenueratio 27 | limit 100; 28 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query21.sql: -------------------------------------------------------------------------------- 1 | select * 2 | from(select w_warehouse_name 3 | ,i_item_id 4 | ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) 5 | then inv_quantity_on_hand 6 | else 0 end) as inv_before 7 | ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) 8 | then inv_quantity_on_hand 9 | else 0 end) as inv_after 10 | from inventory 11 | ,warehouse 12 | ,item 13 | ,date_dim 14 | where i_current_price between 0.99 and 1.49 15 | and item.i_item_sk = inventory.inv_item_sk 16 | and inventory.inv_warehouse_sk = warehouse.w_warehouse_sk 17 | and inventory.inv_date_sk = date_dim.d_date_sk 18 | and d_date between '1998-03-09' and '1998-05-07' 19 | and inv_date between '1998-03-09' and '1998-05-07' 20 | group by w_warehouse_name, i_item_id) x 21 | where (case when inv_before > 0 22 | then inv_after / inv_before 23 | else null 24 | end) between 2.0/3.0 and 3.0/2.0 25 | order by w_warehouse_name 26 | ,i_item_id 27 | limit 100; 28 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query22.sql: -------------------------------------------------------------------------------- 1 | select i_product_name 2 | ,i_brand 3 | ,i_class 4 | ,i_category 5 | ,avg(inv_quantity_on_hand) qoh 6 | from inventory 7 | ,date_dim 8 | ,item 9 | ,warehouse 10 | where inventory.inv_date_sk=date_dim.d_date_sk 11 | and inventory.inv_item_sk=item.i_item_sk 12 | and inventory.inv_warehouse_sk = warehouse.w_warehouse_sk 13 | and date_dim.d_month_seq between 1193 and 1193 + 11 14 | and inv_date between '1999-06-01' and '2000-05-31' 15 | group by i_product_name 16 | ,i_brand 17 | ,i_class 18 | ,i_category with rollup 19 | order by qoh, i_product_name, i_brand, i_class, i_category 20 | limit 100; 21 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query24.sql: -------------------------------------------------------------------------------- 1 | with ssales as 2 | (select c_last_name 3 | ,c_first_name 4 | ,s_store_name 5 | ,ca_state 6 | ,s_state 7 | ,i_color 8 | ,i_current_price 9 | ,i_manager_id 10 | ,i_units 11 | ,i_size 12 | ,sum(ss_sales_price) netpaid 13 | from store_sales 14 | ,store_returns 15 | ,store 16 | ,item 17 | ,customer 18 | ,customer_address 19 | where ss_ticket_number = sr_ticket_number 20 | and ss_item_sk = sr_item_sk 21 | and ss_customer_sk = c_customer_sk 22 | and ss_item_sk = i_item_sk 23 | and ss_store_sk = s_store_sk 24 | and c_birth_country = upper(ca_country) 25 | and s_zip = ca_zip 26 | and s_market_id=7 27 | group by c_last_name 28 | ,c_first_name 29 | ,s_store_name 30 | ,ca_state 31 | ,s_state 32 | ,i_color 33 | ,i_current_price 34 | ,i_manager_id 35 | ,i_units 36 | ,i_size) 37 | select c_last_name 38 | ,c_first_name 39 | ,s_store_name 40 | ,sum(netpaid) paid 41 | from ssales 42 | where i_color = 'orchid' 43 | group by c_last_name 44 | ,c_first_name 45 | ,s_store_name 46 | having sum(netpaid) > (select 0.05*avg(netpaid) 47 | from ssales) 48 | ; 49 | 50 | with ssales as 51 | (select c_last_name 52 | ,c_first_name 53 | ,s_store_name 54 | ,ca_state 55 | ,s_state 56 | ,i_color 57 | ,i_current_price 58 | ,i_manager_id 59 | ,i_units 60 | ,i_size 61 | ,sum(ss_sales_price) netpaid 62 | from store_sales 63 | ,store_returns 64 | ,store 65 | ,item 66 | ,customer 67 | ,customer_address 68 | where ss_ticket_number = sr_ticket_number 69 | and ss_item_sk = sr_item_sk 70 | and ss_customer_sk = c_customer_sk 71 | and ss_item_sk = i_item_sk 72 | and ss_store_sk = s_store_sk 73 | and c_birth_country = upper(ca_country) 74 | and s_zip = ca_zip 75 | and s_market_id = 7 76 | group by c_last_name 77 | ,c_first_name 78 | ,s_store_name 79 | ,ca_state 80 | ,s_state 81 | ,i_color 82 | ,i_current_price 83 | ,i_manager_id 84 | ,i_units 85 | ,i_size) 86 | select c_last_name 87 | ,c_first_name 88 | ,s_store_name 89 | ,sum(netpaid) paid 90 | from ssales 91 | where i_color = 'chiffon' 92 | group by c_last_name 93 | ,c_first_name 94 | ,s_store_name 95 | having sum(netpaid) > (select 0.05*avg(netpaid) 96 | from ssales) 97 | ; 98 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query25.sql: -------------------------------------------------------------------------------- 1 | select 2 | i_item_id 3 | ,i_item_desc 4 | ,s_store_id 5 | ,s_store_name 6 | ,sum(ss_net_profit) as store_sales_profit 7 | ,sum(sr_net_loss) as store_returns_loss 8 | ,sum(cs_net_profit) as catalog_sales_profit 9 | from 10 | store_sales 11 | ,store_returns 12 | ,catalog_sales 13 | ,date_dim d1 14 | ,date_dim d2 15 | ,date_dim d3 16 | ,store 17 | ,item 18 | where 19 | d1.d_moy = 4 20 | and d1.d_year = 1998 21 | and d1.d_date_sk = ss_sold_date_sk 22 | and i_item_sk = ss_item_sk 23 | and s_store_sk = ss_store_sk 24 | and ss_customer_sk = sr_customer_sk 25 | and ss_item_sk = sr_item_sk 26 | and ss_ticket_number = sr_ticket_number 27 | and sr_returned_date_sk = d2.d_date_sk 28 | and d2.d_moy between 4 and 10 29 | and d2.d_year = 1998 30 | and sr_customer_sk = cs_bill_customer_sk 31 | and sr_item_sk = cs_item_sk 32 | and cs_sold_date_sk = d3.d_date_sk 33 | and d3.d_moy between 4 and 10 34 | and d3.d_year = 1998 35 | and ss_sold_date between '1998-04-01' and '1998-04-30' 36 | and sr_returned_date between '1998-04-01' and '1998-10-31' 37 | and cs_sold_date between '1998-04-01' and '1998-10-31' 38 | group by 39 | i_item_id 40 | ,i_item_desc 41 | ,s_store_id 42 | ,s_store_name 43 | order by 44 | i_item_id 45 | ,i_item_desc 46 | ,s_store_id 47 | ,s_store_name 48 | limit 100; 49 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query26.sql: -------------------------------------------------------------------------------- 1 | select i_item_id, 2 | avg(cs_quantity) agg1, 3 | avg(cs_list_price) agg2, 4 | avg(cs_coupon_amt) agg3, 5 | avg(cs_sales_price) agg4 6 | from catalog_sales, customer_demographics, date_dim, item, promotion 7 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and 8 | catalog_sales.cs_item_sk = item.i_item_sk and 9 | catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk and 10 | catalog_sales.cs_promo_sk = promotion.p_promo_sk and 11 | cd_gender = 'F' and 12 | cd_marital_status = 'W' and 13 | cd_education_status = 'Primary' and 14 | (p_channel_email = 'N' or p_channel_event = 'N') and 15 | d_year = 1998 16 | and cs_sold_date between '1998-01-01' and '1998-12-31' 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100; 20 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query27.sql: -------------------------------------------------------------------------------- 1 | select i_item_id, 2 | s_state, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | from store_sales, customer_demographics, date_dim, store, item 8 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk and 9 | store_sales.ss_item_sk = item.i_item_sk and 10 | store_sales.ss_store_sk = store.s_store_sk and 11 | store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and 12 | customer_demographics.cd_gender = 'F' and 13 | customer_demographics.cd_marital_status = 'D' and 14 | customer_demographics.cd_education_status = 'Unknown' and 15 | date_dim.d_year = 1998 and 16 | ss_sold_date between '1998-01-01' and '1998-12-31' and 17 | store.s_state in ('KS','AL', 'MN', 'AL', 'SC', 'VT') 18 | group by i_item_id, s_state 19 | order by i_item_id 20 | ,s_state 21 | limit 100; 22 | 23 | 24 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query28.sql: -------------------------------------------------------------------------------- 1 | select * 2 | from (select avg(ss_list_price) B1_LP 3 | ,count(ss_list_price) B1_CNT 4 | ,count(distinct ss_list_price) B1_CNTD 5 | from store_sales 6 | where ss_quantity between 0 and 5 7 | and (ss_list_price between 11 and 11+10 8 | or ss_coupon_amt between 460 and 460+1000 9 | or ss_wholesale_cost between 14 and 14+20)) B1, 10 | (select avg(ss_list_price) B2_LP 11 | ,count(ss_list_price) B2_CNT 12 | ,count(distinct ss_list_price) B2_CNTD 13 | from store_sales 14 | where ss_quantity between 6 and 10 15 | and (ss_list_price between 91 and 91+10 16 | or ss_coupon_amt between 1430 and 1430+1000 17 | or ss_wholesale_cost between 32 and 32+20)) B2, 18 | (select avg(ss_list_price) B3_LP 19 | ,count(ss_list_price) B3_CNT 20 | ,count(distinct ss_list_price) B3_CNTD 21 | from store_sales 22 | where ss_quantity between 11 and 15 23 | and (ss_list_price between 66 and 66+10 24 | or ss_coupon_amt between 920 and 920+1000 25 | or ss_wholesale_cost between 4 and 4+20)) B3, 26 | (select avg(ss_list_price) B4_LP 27 | ,count(ss_list_price) B4_CNT 28 | ,count(distinct ss_list_price) B4_CNTD 29 | from store_sales 30 | where ss_quantity between 16 and 20 31 | and (ss_list_price between 142 and 142+10 32 | or ss_coupon_amt between 3054 and 3054+1000 33 | or ss_wholesale_cost between 80 and 80+20)) B4, 34 | (select avg(ss_list_price) B5_LP 35 | ,count(ss_list_price) B5_CNT 36 | ,count(distinct ss_list_price) B5_CNTD 37 | from store_sales 38 | where ss_quantity between 21 and 25 39 | and (ss_list_price between 135 and 135+10 40 | or ss_coupon_amt between 14180 and 14180+1000 41 | or ss_wholesale_cost between 38 and 38+20)) B5, 42 | (select avg(ss_list_price) B6_LP 43 | ,count(ss_list_price) B6_CNT 44 | ,count(distinct ss_list_price) B6_CNTD 45 | from store_sales 46 | where ss_quantity between 26 and 30 47 | and (ss_list_price between 28 and 28+10 48 | or ss_coupon_amt between 2513 and 2513+1000 49 | or ss_wholesale_cost between 42 and 42+20)) B6 50 | limit 100; 51 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query29.sql: -------------------------------------------------------------------------------- 1 | select 2 | i_item_id 3 | ,i_item_desc 4 | ,s_store_id 5 | ,s_store_name 6 | ,sum(ss_quantity) as store_sales_quantity 7 | ,sum(sr_return_quantity) as store_returns_quantity 8 | ,sum(cs_quantity) as catalog_sales_quantity 9 | from 10 | store_sales 11 | ,store_returns 12 | ,catalog_sales 13 | ,date_dim d1 14 | ,date_dim d2 15 | ,date_dim d3 16 | ,store 17 | ,item 18 | where 19 | d1.d_moy = 2 20 | and d1.d_year = 2000 21 | and d1.d_date_sk = ss_sold_date_sk 22 | and i_item_sk = ss_item_sk 23 | and s_store_sk = ss_store_sk 24 | and ss_customer_sk = sr_customer_sk 25 | and ss_item_sk = sr_item_sk 26 | and ss_ticket_number = sr_ticket_number 27 | and sr_returned_date_sk = d2.d_date_sk 28 | and d2.d_moy between 2 and 2 + 3 29 | and d2.d_year = 2000 30 | and sr_customer_sk = cs_bill_customer_sk 31 | and sr_item_sk = cs_item_sk 32 | and cs_sold_date_sk = d3.d_date_sk 33 | and d3.d_year in (2000,2000+1,2000+2) 34 | group by 35 | i_item_id 36 | ,i_item_desc 37 | ,s_store_id 38 | ,s_store_name 39 | order by 40 | i_item_id 41 | ,i_item_desc 42 | ,s_store_id 43 | ,s_store_name 44 | limit 100; 45 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query3.sql: -------------------------------------------------------------------------------- 1 | select dt.d_year 2 | ,item.i_brand_id brand_id 3 | ,item.i_brand brand 4 | ,sum(ss_ext_sales_price) sum_agg 5 | from date_dim dt 6 | ,store_sales 7 | ,item 8 | where dt.d_date_sk = store_sales.ss_sold_date_sk 9 | and store_sales.ss_item_sk = item.i_item_sk 10 | and item.i_manufact_id = 436 11 | and dt.d_moy=12 12 | and ( 13 | ( ss_sold_date between '1998-12-01' and '1998-12-31' ) or 14 | ( ss_sold_date between '1999-12-01' and '1999-12-31' ) or 15 | ( ss_sold_date between '2000-12-01' and '2000-12-31' ) or 16 | ( ss_sold_date between '2001-12-01' and '2001-12-31' ) or 17 | ( ss_sold_date between '2002-12-01' and '2002-12-31' ) 18 | ) 19 | group by dt.d_year 20 | ,item.i_brand 21 | ,item.i_brand_id 22 | order by dt.d_year 23 | ,sum_agg desc 24 | ,brand_id 25 | limit 100; 26 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query31.sql: -------------------------------------------------------------------------------- 1 | with ss as 2 | (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales 3 | from store_sales,date_dim,customer_address 4 | where ss_sold_date_sk = d_date_sk 5 | and ss_addr_sk=ca_address_sk 6 | group by ca_county,d_qoy, d_year), 7 | ws as 8 | (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales 9 | from web_sales,date_dim,customer_address 10 | where ws_sold_date_sk = d_date_sk 11 | and ws_bill_addr_sk=ca_address_sk 12 | group by ca_county,d_qoy, d_year) 13 | select 14 | ss1.ca_county 15 | ,ss1.d_year 16 | ,ws2.web_sales/ws1.web_sales web_q1_q2_increase 17 | ,ss2.store_sales/ss1.store_sales store_q1_q2_increase 18 | ,ws3.web_sales/ws2.web_sales web_q2_q3_increase 19 | ,ss3.store_sales/ss2.store_sales store_q2_q3_increase 20 | from 21 | ss ss1 22 | ,ss ss2 23 | ,ss ss3 24 | ,ws ws1 25 | ,ws ws2 26 | ,ws ws3 27 | where 28 | ss1.d_qoy = 1 29 | and ss1.d_year = 1998 30 | and ss1.ca_county = ss2.ca_county 31 | and ss2.d_qoy = 2 32 | and ss2.d_year = 1998 33 | and ss2.ca_county = ss3.ca_county 34 | and ss3.d_qoy = 3 35 | and ss3.d_year = 1998 36 | and ss1.ca_county = ws1.ca_county 37 | and ws1.d_qoy = 1 38 | and ws1.d_year = 1998 39 | and ws1.ca_county = ws2.ca_county 40 | and ws2.d_qoy = 2 41 | and ws2.d_year = 1998 42 | and ws1.ca_county = ws3.ca_county 43 | and ws3.d_qoy = 3 44 | and ws3.d_year =1998 45 | and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end 46 | > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end 47 | and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end 48 | > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end 49 | order by web_q1_q2_increase; 50 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query32.sql: -------------------------------------------------------------------------------- 1 | SELECT sum(cs1.cs_ext_discount_amt) as excess_discount_amount 2 | FROM (SELECT cs.cs_item_sk as cs_item_sk, 3 | cs.cs_ext_discount_amt as cs_ext_discount_amt 4 | FROM catalog_sales cs 5 | JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk) 6 | WHERE d.d_date between '2000-01-27' and '2000-04-27' 7 | and cs_sold_date between '2000-01-27' and '2000-04-27') cs1 8 | JOIN item i ON (i.i_item_sk = cs1.cs_item_sk) 9 | JOIN (SELECT cs2.cs_item_sk as cs_item_sk, 10 | 1.3 * avg(cs_ext_discount_amt) as avg_cs_ext_discount_amt 11 | FROM (SELECT cs.cs_item_sk as cs_item_sk, 12 | cs.cs_ext_discount_amt as cs_ext_discount_amt 13 | FROM catalog_sales cs 14 | JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk) 15 | WHERE d.d_date between '2000-01-27' and '2000-04-27' 16 | and cs_sold_date between '2000-01-27' and '2000-04-27') cs2 17 | GROUP BY cs2.cs_item_sk) tmp1 18 | ON (i.i_item_sk = tmp1.cs_item_sk) 19 | WHERE i.i_manufact_id = 436 and 20 | cs1.cs_ext_discount_amt > tmp1.avg_cs_ext_discount_amt; 21 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query39.sql: -------------------------------------------------------------------------------- 1 | select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov 2 | ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov 3 | from 4 | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 5 | ,stdev,mean, case mean when cast (0 as double) then null else stdev/mean end cov 6 | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 7 | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean 8 | from inventory 9 | JOIN item ON inventory.inv_item_sk = item.i_item_sk 10 | JOIN warehouse ON inventory.inv_warehouse_sk = warehouse.w_warehouse_sk 11 | JOIN date_dim ON inventory.inv_date_sk = date_dim.d_date_sk 12 | where 13 | d_year =2000 14 | and inv_date between '2000-01-01' and '2000-12-31' 15 | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo 16 | where case mean when cast (0 as double) then cast (0 as double) else stdev/mean end > 1) inv1 17 | JOIN 18 | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 19 | ,stdev,mean, case mean when cast (0 as double) then null else stdev/mean end cov 20 | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 21 | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean 22 | from inventory 23 | JOIN item ON inventory.inv_item_sk = item.i_item_sk 24 | JOIN warehouse ON inventory.inv_warehouse_sk = warehouse.w_warehouse_sk 25 | JOIN date_dim ON inventory.inv_date_sk = date_dim.d_date_sk 26 | where 27 | d_year =2000 28 | and inv_date between '2000-01-01' and '2000-12-31' 29 | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo 30 | where case mean when cast (0 as double) then cast (0 as double) else stdev/mean end > 1) inv2 31 | ON 32 | inv1.i_item_sk = inv2.i_item_sk 33 | and inv1.w_warehouse_sk = inv2.w_warehouse_sk 34 | where 35 | inv1.d_moy=1 36 | and inv2.d_moy=1+1 37 | order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov 38 | ,inv2.d_moy,inv2.mean, inv2.cov; 39 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query40.sql: -------------------------------------------------------------------------------- 1 | select 2 | w_state 3 | ,i_item_id 4 | ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) 5 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before 6 | ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) 7 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after 8 | from 9 | catalog_sales left outer join catalog_returns on 10 | (catalog_sales.cs_order_number = catalog_returns.cr_order_number 11 | and catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) 12 | ,warehouse 13 | ,item 14 | ,date_dim 15 | where 16 | i_current_price between 0.99 and 1.49 17 | and item.i_item_sk = catalog_sales.cs_item_sk 18 | and catalog_sales.cs_warehouse_sk = warehouse.w_warehouse_sk 19 | and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 20 | and date_dim.d_date between '1998-03-09' and '1998-05-08' 21 | and cs_sold_date between '1998-03-09' and '1998-05-08' 22 | group by 23 | w_state,i_item_id 24 | order by w_state,i_item_id 25 | limit 100; 26 | 27 | 28 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query42.sql: -------------------------------------------------------------------------------- 1 | select dt.d_year 2 | ,item.i_category_id 3 | ,item.i_category 4 | ,sum(ss_ext_sales_price) as s 5 | from date_dim dt 6 | ,store_sales 7 | ,item 8 | where dt.d_date_sk = store_sales.ss_sold_date_sk 9 | and store_sales.ss_item_sk = item.i_item_sk 10 | and item.i_manager_id = 1 11 | and dt.d_moy=12 12 | and dt.d_year=1998 13 | and ss_sold_date between '1998-12-01' and '1998-12-31' 14 | group by dt.d_year 15 | ,item.i_category_id 16 | ,item.i_category 17 | order by s desc,dt.d_year 18 | ,item.i_category_id 19 | ,item.i_category 20 | limit 100 ; 21 | 22 | 23 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query43.sql: -------------------------------------------------------------------------------- 1 | 2 | select s_store_name, s_store_id, 3 | sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, 4 | sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, 5 | sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, 6 | sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, 7 | sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, 8 | sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, 9 | sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales 10 | from date_dim, store_sales, store 11 | where date_dim.d_date_sk = store_sales.ss_sold_date_sk and 12 | store.s_store_sk = store_sales.ss_store_sk and 13 | s_gmt_offset = -6 and 14 | d_year = 1998 15 | and ss_sold_date between '1998-01-01' and '1998-12-31' 16 | group by s_store_name, s_store_id 17 | order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales 18 | limit 100; 19 | 20 | 21 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query45.sql: -------------------------------------------------------------------------------- 1 | select ca_zip, ca_county, sum(ws_sales_price) 2 | from 3 | web_sales 4 | JOIN customer ON web_sales.ws_bill_customer_sk = customer.c_customer_sk 5 | JOIN customer_address ON customer.c_current_addr_sk = customer_address.ca_address_sk 6 | JOIN date_dim ON web_sales.ws_sold_date_sk = date_dim.d_date_sk 7 | JOIN item ON web_sales.ws_item_sk = item.i_item_sk 8 | where 9 | ( item.i_item_id in (select i_item_id 10 | from item i2 11 | where i2.i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 12 | ) 13 | ) 14 | and d_qoy = 2 and d_year = 2000 15 | and ws_sold_date between '2000-04-01' and '2000-06-30' 16 | group by ca_zip, ca_county 17 | order by ca_zip, ca_county 18 | limit 100; 19 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query48.sql: -------------------------------------------------------------------------------- 1 | 2 | select sum (ss_quantity) 3 | from store_sales, store, customer_demographics, customer_address, date_dim 4 | where store.s_store_sk = store_sales.ss_store_sk 5 | and store_sales.ss_sold_date_sk = date_dim.d_date_sk and d_year = 1998 6 | and ss_sold_date between '1998-01-01' and '1998-12-31' 7 | and 8 | ( 9 | ( 10 | customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk 11 | and 12 | cd_marital_status = 'M' 13 | and 14 | cd_education_status = '4 yr Degree' 15 | and 16 | ss_sales_price between 100.00 and 150.00 17 | ) 18 | or 19 | ( 20 | customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk 21 | and 22 | cd_marital_status = 'M' 23 | and 24 | cd_education_status = '4 yr Degree' 25 | and 26 | ss_sales_price between 50.00 and 100.00 27 | ) 28 | or 29 | ( 30 | customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk 31 | and 32 | cd_marital_status = 'M' 33 | and 34 | cd_education_status = '4 yr Degree' 35 | and 36 | ss_sales_price between 150.00 and 200.00 37 | ) 38 | ) 39 | and 40 | ( 41 | ( 42 | store_sales.ss_addr_sk = customer_address.ca_address_sk 43 | and 44 | ca_country = 'United States' 45 | and 46 | ca_state in ('KY', 'GA', 'NM') 47 | and ss_net_profit between 0 and 2000 48 | ) 49 | or 50 | (store_sales.ss_addr_sk = customer_address.ca_address_sk 51 | and 52 | ca_country = 'United States' 53 | and 54 | ca_state in ('MT', 'OR', 'IN') 55 | and ss_net_profit between 150 and 3000 56 | ) 57 | or 58 | (store_sales.ss_addr_sk = customer_address.ca_address_sk 59 | and 60 | ca_country = 'United States' 61 | and 62 | ca_state in ('WI', 'MO', 'WV') 63 | and ss_net_profit between 50 and 25000 64 | ) 65 | ) 66 | ; 67 | 68 | 69 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query50.sql: -------------------------------------------------------------------------------- 1 | 2 | select 3 | s_store_name 4 | ,s_company_id 5 | ,s_street_number 6 | ,s_street_name 7 | ,s_street_type 8 | ,s_suite_number 9 | ,s_city 10 | ,s_county 11 | ,s_state 12 | ,s_zip 13 | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as 30days 14 | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and 15 | (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as 3160days 16 | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and 17 | (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as 6190days 18 | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and 19 | (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as 91120days 20 | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as 120days 21 | from 22 | store_sales 23 | ,store_returns 24 | ,store 25 | ,date_dim d1 26 | ,date_dim d2 27 | where 28 | d2.d_year = 2000 29 | and d2.d_moy = 9 30 | and sr_returned_date between '2000-09-01' and '2000-09-30' 31 | and store_sales.ss_ticket_number = store_returns.sr_ticket_number 32 | and store_sales.ss_item_sk = store_returns.sr_item_sk 33 | and store_sales.ss_sold_date_sk = d1.d_date_sk 34 | and sr_returned_date_sk = d2.d_date_sk 35 | and store_sales.ss_customer_sk = store_returns.sr_customer_sk 36 | and store_sales.ss_store_sk = store.s_store_sk 37 | group by 38 | s_store_name 39 | ,s_company_id 40 | ,s_street_number 41 | ,s_street_name 42 | ,s_street_type 43 | ,s_suite_number 44 | ,s_city 45 | ,s_county 46 | ,s_state 47 | ,s_zip 48 | order by s_store_name 49 | ,s_company_id 50 | ,s_street_number 51 | ,s_street_name 52 | ,s_street_type 53 | ,s_suite_number 54 | ,s_city 55 | ,s_county 56 | ,s_state 57 | ,s_zip 58 | limit 100; 59 | 60 | 61 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query51.sql: -------------------------------------------------------------------------------- 1 | WITH web_v1 as ( 2 | select 3 | ws_item_sk item_sk, d_date, sum(ws_sales_price), 4 | sum(sum(ws_sales_price)) 5 | over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales 6 | from web_sales 7 | ,date_dim 8 | where ws_sold_date_sk=d_date_sk 9 | and d_month_seq between 1193 and 1193+11 10 | and ws_item_sk is not NULL 11 | group by ws_item_sk, d_date), 12 | store_v1 as ( 13 | select 14 | ss_item_sk item_sk, d_date, sum(ss_sales_price), 15 | sum(sum(ss_sales_price)) 16 | over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales 17 | from store_sales 18 | ,date_dim 19 | where ss_sold_date_sk=d_date_sk 20 | and d_month_seq between 1193 and 1193+11 21 | and ss_item_sk is not NULL 22 | group by ss_item_sk, d_date) 23 | select * 24 | from (select item_sk 25 | ,d_date 26 | ,web_sales 27 | ,store_sales 28 | ,max(web_sales) 29 | over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative 30 | ,max(store_sales) 31 | over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative 32 | from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk 33 | ,case when web.d_date is not null then web.d_date else store.d_date end d_date 34 | ,web.cume_sales web_sales 35 | ,store.cume_sales store_sales 36 | from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk 37 | and web.d_date = store.d_date) 38 | )x )y 39 | where web_cumulative > store_cumulative 40 | order by item_sk 41 | ,d_date 42 | limit 100; 43 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query52.sql: -------------------------------------------------------------------------------- 1 | 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) ext_price 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | and ss_sold_date between '1998-12-01' and '1998-12-31' 15 | group by dt.d_year 16 | ,item.i_brand 17 | ,item.i_brand_id 18 | order by dt.d_year 19 | ,ext_price desc 20 | ,brand_id 21 | limit 100 ; 22 | 23 | 24 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query54.sql: -------------------------------------------------------------------------------- 1 | with my_customers as ( 2 | select distinct c_customer_sk 3 | , c_current_addr_sk 4 | from 5 | ( select cs_sold_date_sk sold_date_sk, 6 | cs_bill_customer_sk customer_sk, 7 | cs_item_sk item_sk 8 | from catalog_sales 9 | union all 10 | select ws_sold_date_sk sold_date_sk, 11 | ws_bill_customer_sk customer_sk, 12 | ws_item_sk item_sk 13 | from web_sales 14 | ) cs_or_ws_sales, 15 | item, 16 | date_dim, 17 | customer 18 | where sold_date_sk = d_date_sk 19 | and item_sk = i_item_sk 20 | and i_category = 'Jewelry' 21 | and i_class = 'football' 22 | and c_customer_sk = cs_or_ws_sales.customer_sk 23 | and d_moy = 3 24 | and d_year = 2000 25 | ) 26 | , my_revenue as ( 27 | select c_customer_sk, 28 | sum(ss_ext_sales_price) as revenue 29 | from my_customers, 30 | store_sales, 31 | customer_address, 32 | store, 33 | date_dim 34 | where c_current_addr_sk = ca_address_sk 35 | and ca_county = s_county 36 | and ca_state = s_state 37 | and ss_sold_date_sk = d_date_sk 38 | and c_customer_sk = ss_customer_sk 39 | and d_month_seq between (select distinct d_month_seq+1 40 | from date_dim where d_year = 2000 and d_moy = 3) 41 | and (select distinct d_month_seq+3 42 | from date_dim where d_year = 2000 and d_moy = 3) 43 | group by c_customer_sk 44 | ) 45 | , segments as 46 | (select cast((revenue/50) as int) as segment 47 | from my_revenue 48 | ) 49 | select segment, count(*) as num_customers, segment*50 as segment_base 50 | from segments 51 | group by segment 52 | order by segment, num_customers 53 | limit 100; 54 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query55.sql: -------------------------------------------------------------------------------- 1 | 2 | select i_brand_id brand_id, i_brand brand, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item 5 | where date_dim.d_date_sk = store_sales.ss_sold_date_sk 6 | and store_sales.ss_item_sk = item.i_item_sk 7 | and i_manager_id=36 8 | and d_moy=12 9 | and d_year=2001 10 | and ss_sold_date between '2001-12-01' and '2001-12-31' 11 | group by i_brand, i_brand_id 12 | order by ext_price desc, i_brand_id 13 | limit 100 ; 14 | 15 | 16 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query56.sql: -------------------------------------------------------------------------------- 1 | with ss as ( 2 | select i_item_id,sum(ss_ext_sales_price) total_sales 3 | from 4 | store_sales, 5 | date_dim, 6 | customer_address, 7 | item 8 | where item.i_item_id in (select 9 | i.i_item_id 10 | from item i 11 | where i_color in ('purple','burlywood','indian')) 12 | and ss_item_sk = i_item_sk 13 | and ss_sold_date_sk = d_date_sk 14 | and d_year = 2001 15 | and d_moy = 1 16 | and ss_addr_sk = ca_address_sk 17 | and ca_gmt_offset = -6 18 | group by i_item_id), 19 | cs as ( 20 | select i_item_id,sum(cs_ext_sales_price) total_sales 21 | from 22 | catalog_sales, 23 | date_dim, 24 | customer_address, 25 | item 26 | where 27 | item.i_item_id in (select 28 | i.i_item_id 29 | from item i 30 | where i_color in ('purple','burlywood','indian')) 31 | and cs_item_sk = i_item_sk 32 | and cs_sold_date_sk = d_date_sk 33 | and d_year = 2001 34 | and d_moy = 1 35 | and cs_bill_addr_sk = ca_address_sk 36 | and ca_gmt_offset = -6 37 | group by i_item_id), 38 | ws as ( 39 | select i_item_id,sum(ws_ext_sales_price) total_sales 40 | from 41 | web_sales, 42 | date_dim, 43 | customer_address, 44 | item 45 | where 46 | item.i_item_id in (select 47 | i.i_item_id 48 | from item i 49 | where i_color in ('purple','burlywood','indian')) 50 | and ws_item_sk = i_item_sk 51 | and ws_sold_date_sk = d_date_sk 52 | and d_year = 2001 53 | and d_moy = 1 54 | and ws_bill_addr_sk = ca_address_sk 55 | and ca_gmt_offset = -6 56 | group by i_item_id) 57 | select i_item_id ,sum(total_sales) total_sales 58 | from (select * from ss 59 | union all 60 | select * from cs 61 | union all 62 | select * from ws) tmp1 63 | group by i_item_id 64 | order by total_sales 65 | limit 100; 66 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query58.sql: -------------------------------------------------------------------------------- 1 | select ss_items.item_id 2 | ,ss_item_rev 3 | ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev 4 | ,cs_item_rev 5 | ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev 6 | ,ws_item_rev 7 | ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev 8 | ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average 9 | FROM 10 | ( select i_item_id item_id ,sum(ss_ext_sales_price) as ss_item_rev 11 | from store_sales 12 | JOIN item ON store_sales.ss_item_sk = item.i_item_sk 13 | JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk 14 | JOIN (select d1.d_date 15 | from date_dim d1 JOIN date_dim d2 ON d1.d_week_seq = d2.d_week_seq 16 | where d2.d_date = '1998-08-04') sub ON date_dim.d_date = sub.d_date 17 | where ss_sold_date between '1998-08-04' and '1998-08-10' 18 | group by i_item_id ) ss_items 19 | JOIN 20 | ( select i_item_id item_id ,sum(cs_ext_sales_price) as cs_item_rev 21 | from catalog_sales 22 | JOIN item ON catalog_sales.cs_item_sk = item.i_item_sk 23 | JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 24 | JOIN (select d1.d_date 25 | from date_dim d1 JOIN date_dim d2 ON d1.d_week_seq = d2.d_week_seq 26 | where d2.d_date = '1998-08-04') sub ON date_dim.d_date = sub.d_date 27 | where cs_sold_date between '1998-08-04' and '1998-08-10' 28 | group by i_item_id ) cs_items 29 | ON ss_items.item_id=cs_items.item_id 30 | JOIN 31 | ( select i_item_id item_id ,sum(ws_ext_sales_price) as ws_item_rev 32 | from web_sales 33 | JOIN item ON web_sales.ws_item_sk = item.i_item_sk 34 | JOIN date_dim ON web_sales.ws_sold_date_sk = date_dim.d_date_sk 35 | JOIN (select d1.d_date 36 | from date_dim d1 JOIN date_dim d2 ON d1.d_week_seq = d2.d_week_seq 37 | where d2.d_date = '1998-08-04') sub ON date_dim.d_date = sub.d_date 38 | where ws_sold_date between '1998-08-04' and '1998-08-10' 39 | group by i_item_id ) ws_items 40 | ON ss_items.item_id=ws_items.item_id 41 | where 42 | ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev 43 | and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev 44 | and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev 45 | and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev 46 | and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev 47 | and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev 48 | order by item_id ,ss_item_rev 49 | limit 100; 50 | 51 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query60.sql: -------------------------------------------------------------------------------- 1 | with ss as ( 2 | select 3 | i_item_id,sum(ss_ext_sales_price) total_sales 4 | from 5 | store_sales, 6 | date_dim, 7 | customer_address, 8 | item 9 | where 10 | item.i_item_id in (select 11 | i.i_item_id 12 | from 13 | item i 14 | where i_category in ('Children')) 15 | and ss_item_sk = i_item_sk 16 | and ss_sold_date_sk = d_date_sk 17 | and d_year = 1999 18 | and d_moy = 9 19 | and ss_addr_sk = ca_address_sk 20 | and ca_gmt_offset = -6 21 | group by i_item_id), 22 | cs as ( 23 | select 24 | i_item_id,sum(cs_ext_sales_price) total_sales 25 | from 26 | catalog_sales, 27 | date_dim, 28 | customer_address, 29 | item 30 | where 31 | item.i_item_id in (select 32 | i.i_item_id 33 | from 34 | item i 35 | where i_category in ('Children')) 36 | and cs_item_sk = i_item_sk 37 | and cs_sold_date_sk = d_date_sk 38 | and d_year = 1999 39 | and d_moy = 9 40 | and cs_bill_addr_sk = ca_address_sk 41 | and ca_gmt_offset = -6 42 | group by i_item_id), 43 | ws as ( 44 | select 45 | i_item_id,sum(ws_ext_sales_price) total_sales 46 | from 47 | web_sales, 48 | date_dim, 49 | customer_address, 50 | item 51 | where 52 | item.i_item_id in (select 53 | i.i_item_id 54 | from 55 | item i 56 | where i_category in ('Children')) 57 | and ws_item_sk = i_item_sk 58 | and ws_sold_date_sk = d_date_sk 59 | and d_year = 1999 60 | and d_moy = 9 61 | and ws_bill_addr_sk = ca_address_sk 62 | and ca_gmt_offset = -6 63 | group by i_item_id) 64 | select 65 | i_item_id 66 | ,sum(total_sales) total_sales 67 | from (select * from ss 68 | union all 69 | select * from cs 70 | union all 71 | select * from ws) tmp1 72 | group by i_item_id 73 | order by i_item_id 74 | ,total_sales 75 | limit 100; 76 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query67.sql: -------------------------------------------------------------------------------- 1 | 2 | select * 3 | from (select i_category 4 | ,i_class 5 | ,i_brand 6 | ,i_product_name 7 | ,d_year 8 | ,d_qoy 9 | ,d_moy 10 | ,s_store_id 11 | ,sumsales 12 | ,rank() over (partition by i_category order by sumsales desc) rk 13 | from (select i_category 14 | ,i_class 15 | ,i_brand 16 | ,i_product_name 17 | ,d_year 18 | ,d_qoy 19 | ,d_moy 20 | ,s_store_id 21 | ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales 22 | from store_sales 23 | ,date_dim 24 | ,store 25 | ,item 26 | where store_sales.ss_sold_date_sk=date_dim.d_date_sk 27 | and store_sales.ss_item_sk=item.i_item_sk 28 | and store_sales.ss_store_sk = store.s_store_sk 29 | and d_month_seq between 1193 and 1193+11 30 | and ss_sold_date between '1999-06-01' and '2000-05-31' 31 | group by i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id with rollup)dw1) dw2 32 | where rk <= 100 33 | order by i_category 34 | ,i_class 35 | ,i_brand 36 | ,i_product_name 37 | ,d_year 38 | ,d_qoy 39 | ,d_moy 40 | ,s_store_id 41 | ,sumsales 42 | ,rk 43 | limit 100; 44 | 45 | 46 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query68.sql: -------------------------------------------------------------------------------- 1 | 2 | select c_last_name 3 | ,c_first_name 4 | ,ca_city 5 | ,bought_city 6 | ,ss_ticket_number 7 | ,extended_price 8 | ,extended_tax 9 | ,list_price 10 | from (select ss_ticket_number 11 | ,ss_customer_sk 12 | ,ca_city bought_city 13 | ,sum(ss_ext_sales_price) extended_price 14 | ,sum(ss_ext_list_price) list_price 15 | ,sum(ss_ext_tax) extended_tax 16 | from store_sales 17 | ,date_dim 18 | ,store 19 | ,household_demographics 20 | ,customer_address 21 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 22 | and store_sales.ss_store_sk = store.s_store_sk 23 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 24 | and store_sales.ss_addr_sk = customer_address.ca_address_sk 25 | and date_dim.d_dom between 1 and 2 26 | and (household_demographics.hd_dep_count = 4 or 27 | household_demographics.hd_vehicle_count= 2) 28 | and date_dim.d_year in (1998,1998+1,1998+2) 29 | and ss_sold_date in ( 30 | '1998-01-01', '1998-01-02', '1998-02-01', '1998-02-02', '1998-03-01', '1998-03-02', 31 | '1998-04-01', '1998-04-02', '1998-05-01', '1998-05-02', '1998-06-01', '1998-06-02', 32 | '1998-07-01', '1998-07-02', '1998-08-01', '1998-08-02', '1998-09-01', '1998-09-02', 33 | '1998-10-01', '1998-10-02', '1998-11-01', '1998-11-02', '1998-12-01', '1998-12-02', 34 | '1999-01-01', '1999-01-02', '1999-02-01', '1999-02-02', '1999-03-01', '1999-03-02', 35 | '1999-04-01', '1999-04-02', '1999-05-01', '1999-05-02', '1999-06-01', '1999-06-02', 36 | '1999-07-01', '1999-07-02', '1999-08-01', '1999-08-02', '1999-09-01', '1999-09-02', 37 | '1999-10-01', '1999-10-02', '1999-11-01', '1999-11-02', '1999-12-01', '1999-12-02', 38 | '2000-01-01', '2000-01-02', '2000-02-01', '2000-02-02', '2000-03-01', '2000-03-02', 39 | '2000-04-01', '2000-04-02', '2000-05-01', '2000-05-02', '2000-06-01', '2000-06-02', 40 | '2000-07-01', '2000-07-02', '2000-08-01', '2000-08-02', '2000-09-01', '2000-09-02', 41 | '2000-10-01', '2000-10-02', '2000-11-01', '2000-11-02', '2000-12-01', '2000-12-02' 42 | ) 43 | and store.s_city in ('Rosedale','Bethlehem') 44 | group by ss_ticket_number 45 | ,ss_customer_sk 46 | ,ss_addr_sk,ca_city) dn 47 | ,customer 48 | ,customer_address current_addr 49 | where dn.ss_customer_sk = customer.c_customer_sk 50 | and customer.c_current_addr_sk = current_addr.ca_address_sk 51 | and current_addr.ca_city <> bought_city 52 | order by c_last_name 53 | ,ss_ticket_number 54 | limit 100; 55 | 56 | 57 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query7.sql: -------------------------------------------------------------------------------- 1 | 2 | select i_item_id, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | from store_sales, customer_demographics, date_dim, item, promotion 8 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk and 9 | store_sales.ss_item_sk = item.i_item_sk and 10 | store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and 11 | store_sales.ss_promo_sk = promotion.p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | and ss_sold_date between '1998-01-01' and '1998-12-31' 18 | group by i_item_id 19 | order by i_item_id 20 | limit 100; 21 | 22 | 23 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query70.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(ss_net_profit) as total_sum 3 | ,s_state 4 | ,s_county 5 | ,grouping__id as lochierarchy 6 | , rank() over(partition by grouping__id, case when grouping__id == 2 then s_state end order by sum(ss_net_profit)) as rank_within_parent 7 | from 8 | store_sales ss join date_dim d1 on d1.d_date_sk = ss.ss_sold_date_sk 9 | join store s on s.s_store_sk = ss.ss_store_sk 10 | where 11 | d1.d_month_seq between 1193 and 1193+11 12 | and ss_sold_date between '1999-06-01' and '2000-05-31' 13 | and s.s_state in 14 | ( select s_state 15 | from (select s_state as s_state, sum(ss_net_profit), 16 | rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking 17 | from store_sales, store, date_dim 18 | where d_month_seq between 1193 and 1193+11 19 | and date_dim.d_date_sk = store_sales.ss_sold_date_sk 20 | and store.s_store_sk = store_sales.ss_store_sk 21 | and ss_sold_date between '1999-06-01' and '2000-05-31' 22 | group by s_state 23 | ) tmp1 24 | where ranking <= 5 25 | ) 26 | group by s_state,s_county with rollup 27 | order by 28 | lochierarchy desc 29 | ,case when lochierarchy = 0 then s_state end 30 | ,rank_within_parent 31 | limit 100; 32 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query71.sql: -------------------------------------------------------------------------------- 1 | select i_brand_id brand_id, i_brand brand,t_hour,t_minute, 2 | sum(ext_price) ext_price 3 | from item JOIN (select ws_ext_sales_price as ext_price, 4 | ws_sold_date_sk as sold_date_sk, 5 | ws_item_sk as sold_item_sk, 6 | ws_sold_time_sk as time_sk 7 | from web_sales,date_dim 8 | where date_dim.d_date_sk = web_sales.ws_sold_date_sk 9 | and d_moy=12 10 | and d_year=2001 11 | and ws_sold_date between '2001-12-01' and '2001-12-31' 12 | union all 13 | select cs_ext_sales_price as ext_price, 14 | cs_sold_date_sk as sold_date_sk, 15 | cs_item_sk as sold_item_sk, 16 | cs_sold_time_sk as time_sk 17 | from catalog_sales,date_dim 18 | where date_dim.d_date_sk = catalog_sales.cs_sold_date_sk 19 | and d_moy=12 20 | and d_year=2001 21 | and cs_sold_date between '2001-12-01' and '2001-12-31' 22 | union all 23 | select ss_ext_sales_price as ext_price, 24 | ss_sold_date_sk as sold_date_sk, 25 | ss_item_sk as sold_item_sk, 26 | ss_sold_time_sk as time_sk 27 | from store_sales,date_dim 28 | where date_dim.d_date_sk = store_sales.ss_sold_date_sk 29 | and d_moy=12 30 | and d_year=2001 31 | and ss_sold_date between '2001-12-01' and '2001-12-31' 32 | ) tmp ON tmp.sold_item_sk = item.i_item_sk 33 | JOIN time_dim ON tmp.time_sk = time_dim.t_time_sk 34 | where 35 | i_manager_id=1 36 | and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') 37 | group by i_brand, i_brand_id,t_hour,t_minute 38 | order by ext_price desc, i_brand_id 39 | ; 40 | 41 | 42 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query72.sql: -------------------------------------------------------------------------------- 1 | 2 | select i_item_desc 3 | ,w_warehouse_name 4 | ,d1.d_week_seq 5 | ,count(case when p_promo_sk is null then 1 else 0 end) no_promo 6 | ,count(case when p_promo_sk is not null then 1 else 0 end) promo 7 | ,count(*) total_cnt 8 | from catalog_sales 9 | join inventory on (catalog_sales.cs_item_sk = inventory.inv_item_sk) 10 | join warehouse on (warehouse.w_warehouse_sk=inventory.inv_warehouse_sk) 11 | join item on (item.i_item_sk = catalog_sales.cs_item_sk) 12 | join customer_demographics on (catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk) 13 | join household_demographics on (catalog_sales.cs_bill_hdemo_sk = household_demographics.hd_demo_sk) 14 | join date_dim d1 on (catalog_sales.cs_sold_date_sk = d1.d_date_sk) 15 | join date_dim d2 on (inventory.inv_date_sk = d2.d_date_sk) 16 | join date_dim d3 on (catalog_sales.cs_ship_date_sk = d3.d_date_sk) 17 | left outer join promotion on (catalog_sales.cs_promo_sk=promotion.p_promo_sk) 18 | left outer join catalog_returns on (catalog_returns.cr_item_sk = catalog_sales.cs_item_sk and catalog_returns.cr_order_number = catalog_sales.cs_order_number) 19 | where d1.d_week_seq = d2.d_week_seq 20 | and inv_quantity_on_hand < cs_quantity 21 | and d3.d_date > d1.d_date + 5 22 | and hd_buy_potential = '1001-5000' 23 | and d1.d_year = 2001 24 | and hd_buy_potential = '1001-5000' 25 | and cd_marital_status = 'M' 26 | and d1.d_year = 2001 27 | and inv_date between '2001-01-01' and '2001-12-31' 28 | and cs_sold_date between '2001-01-01' and '2002-12-31' 29 | group by i_item_desc,w_warehouse_name,d1.d_week_seq 30 | order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq 31 | limit 100; 32 | 33 | 34 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query73.sql: -------------------------------------------------------------------------------- 1 | 2 | select c_last_name 3 | ,c_first_name 4 | ,c_salutation 5 | ,c_preferred_cust_flag 6 | ,ss_ticket_number 7 | ,cnt from 8 | (select ss_ticket_number 9 | ,ss_customer_sk 10 | ,count(*) cnt 11 | from store_sales,date_dim,store,household_demographics 12 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 13 | and store_sales.ss_store_sk = store.s_store_sk 14 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 15 | and date_dim.d_dom between 1 and 2 16 | and (household_demographics.hd_buy_potential = '1001-5000' or 17 | household_demographics.hd_buy_potential = '5001-10000') 18 | and household_demographics.hd_vehicle_count > 0 19 | and case when household_demographics.hd_vehicle_count > 0 then 20 | household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 21 | and date_dim.d_year in (1998,1998+1,1998+2) 22 | and ss_sold_date in ( 23 | '1998-01-01', '1998-01-02', '1998-02-01', '1998-02-02', '1998-03-01', '1998-03-02', 24 | '1998-04-01', '1998-04-02', '1998-05-01', '1998-05-02', '1998-06-01', '1998-06-02', 25 | '1998-07-01', '1998-07-02', '1998-08-01', '1998-08-02', '1998-09-01', '1998-09-02', 26 | '1998-10-01', '1998-10-02', '1998-11-01', '1998-11-02', '1998-12-01', '1998-12-02', 27 | '1999-01-01', '1999-01-02', '1999-02-01', '1999-02-02', '1999-03-01', '1999-03-02', 28 | '1999-04-01', '1999-04-02', '1999-05-01', '1999-05-02', '1999-06-01', '1999-06-02', 29 | '1999-07-01', '1999-07-02', '1999-08-01', '1999-08-02', '1999-09-01', '1999-09-02', 30 | '1999-10-01', '1999-10-02', '1999-11-01', '1999-11-02', '1999-12-01', '1999-12-02', 31 | '2000-01-01', '2000-01-02', '2000-02-01', '2000-02-02', '2000-03-01', '2000-03-02', 32 | '2000-04-01', '2000-04-02', '2000-05-01', '2000-05-02', '2000-06-01', '2000-06-02', 33 | '2000-07-01', '2000-07-02', '2000-08-01', '2000-08-02', '2000-09-01', '2000-09-02', 34 | '2000-10-01', '2000-10-02', '2000-11-01', '2000-11-02', '2000-12-01', '2000-12-02' 35 | ) 36 | and store.s_county in ('Kittitas County','Adams County','Richland County','Furnas County') 37 | group by ss_ticket_number,ss_customer_sk) dj,customer 38 | where dj.ss_customer_sk = customer.c_customer_sk 39 | and cnt between 1 and 5 40 | order by cnt desc; 41 | 42 | 43 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query75.sql: -------------------------------------------------------------------------------- 1 | WITH all_sales AS ( 2 | SELECT d_year 3 | ,i_brand_id 4 | ,i_class_id 5 | ,i_category_id 6 | ,i_manufact_id 7 | ,SUM(sales_cnt) AS sales_cnt 8 | ,SUM(sales_amt) AS sales_amt 9 | FROM (SELECT d_year 10 | ,i_brand_id 11 | ,i_class_id 12 | ,i_category_id 13 | ,i_manufact_id 14 | ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt 15 | ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt 16 | FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk 17 | JOIN date_dim ON d_date_sk=cs_sold_date_sk 18 | LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number 19 | AND cs_item_sk=cr_item_sk) 20 | WHERE i_category='Sports' 21 | UNION ALL 22 | SELECT d_year 23 | ,i_brand_id 24 | ,i_class_id 25 | ,i_category_id 26 | ,i_manufact_id 27 | ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt 28 | ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt 29 | FROM store_sales JOIN item ON i_item_sk=ss_item_sk 30 | JOIN date_dim ON d_date_sk=ss_sold_date_sk 31 | LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number 32 | AND ss_item_sk=sr_item_sk) 33 | WHERE i_category='Sports' 34 | UNION ALL 35 | SELECT d_year 36 | ,i_brand_id 37 | ,i_class_id 38 | ,i_category_id 39 | ,i_manufact_id 40 | ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt 41 | ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt 42 | FROM web_sales JOIN item ON i_item_sk=ws_item_sk 43 | JOIN date_dim ON d_date_sk=ws_sold_date_sk 44 | LEFT JOIN web_returns ON (ws_order_number=wr_order_number 45 | AND ws_item_sk=wr_item_sk) 46 | WHERE i_category='Sports') sales_detail 47 | GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) 48 | SELECT prev_yr.d_year AS prev_year 49 | ,curr_yr.d_year AS year 50 | ,curr_yr.i_brand_id 51 | ,curr_yr.i_class_id 52 | ,curr_yr.i_category_id 53 | ,curr_yr.i_manufact_id 54 | ,prev_yr.sales_cnt AS prev_yr_cnt 55 | ,curr_yr.sales_cnt AS curr_yr_cnt 56 | ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff 57 | ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff 58 | FROM all_sales curr_yr, all_sales prev_yr 59 | WHERE curr_yr.i_brand_id=prev_yr.i_brand_id 60 | AND curr_yr.i_class_id=prev_yr.i_class_id 61 | AND curr_yr.i_category_id=prev_yr.i_category_id 62 | AND curr_yr.i_manufact_id=prev_yr.i_manufact_id 63 | AND curr_yr.d_year=2002 64 | AND prev_yr.d_year=2002-1 65 | AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 66 | ORDER BY sales_cnt_diff 67 | limit 100; 68 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query76.sql: -------------------------------------------------------------------------------- 1 | 2 | select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( 3 | SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price 4 | FROM store_sales, item, date_dim 5 | WHERE ss_addr_sk IS NULL 6 | AND store_sales.ss_sold_date_sk=date_dim.d_date_sk 7 | AND store_sales.ss_item_sk=item.i_item_sk 8 | UNION ALL 9 | SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price 10 | FROM web_sales, item, date_dim 11 | WHERE ws_web_page_sk IS NULL 12 | AND web_sales.ws_sold_date_sk=date_dim.d_date_sk 13 | AND web_sales.ws_item_sk=item.i_item_sk 14 | UNION ALL 15 | SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price 16 | FROM catalog_sales, item, date_dim 17 | WHERE cs_warehouse_sk IS NULL 18 | AND catalog_sales.cs_sold_date_sk=date_dim.d_date_sk 19 | AND catalog_sales.cs_item_sk=item.i_item_sk) foo 20 | GROUP BY channel, col_name, d_year, d_qoy, i_category 21 | ORDER BY channel, col_name, d_year, d_qoy, i_category 22 | limit 100; 23 | 24 | 25 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query80.sql: -------------------------------------------------------------------------------- 1 | with ssr as 2 | (select s_store_id as store_id, 3 | sum(ss_ext_sales_price) as sales, 4 | sum(coalesce(sr_return_amt, 0)) as returns, 5 | sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit 6 | from store_sales left outer join store_returns on 7 | (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), 8 | date_dim, 9 | store, 10 | item, 11 | promotion 12 | where ss_sold_date_sk = d_date_sk 13 | and d_date between cast('1998-08-04' as date) 14 | and (cast('1998-09-04' as date)) 15 | and ss_store_sk = s_store_sk 16 | and ss_item_sk = i_item_sk 17 | and i_current_price > 50 18 | and ss_promo_sk = p_promo_sk 19 | and p_channel_tv = 'N' 20 | group by s_store_id) 21 | , 22 | csr as 23 | (select cp_catalog_page_id as catalog_page_id, 24 | sum(cs_ext_sales_price) as sales, 25 | sum(coalesce(cr_return_amount, 0)) as returns, 26 | sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit 27 | from catalog_sales left outer join catalog_returns on 28 | (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), 29 | date_dim, 30 | catalog_page, 31 | item, 32 | promotion 33 | where cs_sold_date_sk = d_date_sk 34 | and d_date between cast('1998-08-04' as date) 35 | and (cast('1998-09-04' as date)) 36 | and cs_catalog_page_sk = cp_catalog_page_sk 37 | and cs_item_sk = i_item_sk 38 | and i_current_price > 50 39 | and cs_promo_sk = p_promo_sk 40 | and p_channel_tv = 'N' 41 | group by cp_catalog_page_id) 42 | , 43 | wsr as 44 | (select web_site_id, 45 | sum(ws_ext_sales_price) as sales, 46 | sum(coalesce(wr_return_amt, 0)) as returns, 47 | sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit 48 | from web_sales left outer join web_returns on 49 | (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), 50 | date_dim, 51 | web_site, 52 | item, 53 | promotion 54 | where ws_sold_date_sk = d_date_sk 55 | and d_date between cast('1998-08-04' as date) 56 | and (cast('1998-09-04' as date)) 57 | and ws_web_site_sk = web_site_sk 58 | and ws_item_sk = i_item_sk 59 | and i_current_price > 50 60 | and ws_promo_sk = p_promo_sk 61 | and p_channel_tv = 'N' 62 | group by web_site_id) 63 | select channel 64 | , id 65 | , sum(sales) as sales 66 | , sum(returns) as returns 67 | , sum(profit) as profit 68 | from 69 | (select 'store channel' as channel 70 | , concat('store', store_id) as id 71 | , sales 72 | , returns 73 | , profit 74 | from ssr 75 | union all 76 | select 'catalog channel' as channel 77 | , concat('catalog_page', catalog_page_id) as id 78 | , sales 79 | , returns 80 | , profit 81 | from csr 82 | union all 83 | select 'web channel' as channel 84 | , concat('web_site', web_site_id) as id 85 | , sales 86 | , returns 87 | , profit 88 | from wsr 89 | ) x 90 | group by channel, id with rollup 91 | order by channel 92 | ,id 93 | limit 100; 94 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query82.sql: -------------------------------------------------------------------------------- 1 | select i_item_id 2 | ,i_item_desc 3 | ,i_current_price 4 | from item, inventory, date_dim, store_sales 5 | where i_current_price between 30 and 30+30 6 | and inv_item_sk = i_item_sk 7 | and d_date_sk=inv_date_sk 8 | and d_date between '2002-05-30' and '2002-07-30' 9 | and i_manufact_id in (437,129,727,663) 10 | and inv_quantity_on_hand between 100 and 500 11 | and ss_item_sk = i_item_sk 12 | group by i_item_id,i_item_desc,i_current_price 13 | order by i_item_id 14 | limit 100; 15 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query83.sql: -------------------------------------------------------------------------------- 1 | with sr_items as 2 | (select i_item_id item_id, 3 | sum(sr_return_quantity) sr_item_qty 4 | from store_returns, 5 | item, 6 | date_dim 7 | where sr_item_sk = i_item_sk 8 | and d_date in 9 | (select d_date 10 | from date_dim 11 | where d_week_seq in 12 | (select d_week_seq 13 | from date_dim 14 | where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) 15 | and sr_returned_date_sk = d_date_sk 16 | group by i_item_id), 17 | cr_items as 18 | (select i_item_id item_id, 19 | sum(cr_return_quantity) cr_item_qty 20 | from catalog_returns, 21 | item, 22 | date_dim 23 | where cr_item_sk = i_item_sk 24 | and d_date in 25 | (select d_date 26 | from date_dim 27 | where d_week_seq in 28 | (select d_week_seq 29 | from date_dim 30 | where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) 31 | and cr_returned_date_sk = d_date_sk 32 | group by i_item_id), 33 | wr_items as 34 | (select i_item_id item_id, 35 | sum(wr_return_quantity) wr_item_qty 36 | from web_returns, 37 | item, 38 | date_dim 39 | where wr_item_sk = i_item_sk 40 | and d_date in 41 | (select d_date 42 | from date_dim 43 | where d_week_seq in 44 | (select d_week_seq 45 | from date_dim 46 | where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) 47 | and wr_returned_date_sk = d_date_sk 48 | group by i_item_id) 49 | select sr_items.item_id 50 | ,sr_item_qty 51 | ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev 52 | ,cr_item_qty 53 | ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev 54 | ,wr_item_qty 55 | ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev 56 | ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average 57 | from sr_items 58 | ,cr_items 59 | ,wr_items 60 | where sr_items.item_id=cr_items.item_id 61 | and sr_items.item_id=wr_items.item_id 62 | order by sr_items.item_id 63 | ,sr_item_qty 64 | limit 100; 65 | 66 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query84.sql: -------------------------------------------------------------------------------- 1 | 2 | select c_customer_id as customer_id 3 | ,concat(c_last_name, ', ', c_first_name) as customername 4 | from customer 5 | ,customer_address 6 | ,customer_demographics 7 | ,household_demographics 8 | ,income_band 9 | ,store_returns 10 | where ca_city = 'Hopewell' 11 | and customer.c_current_addr_sk = customer_address.ca_address_sk 12 | and ib_lower_bound >= 32287 13 | and ib_upper_bound <= 32287 + 50000 14 | and income_band.ib_income_band_sk = household_demographics.hd_income_band_sk 15 | and customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk 16 | and household_demographics.hd_demo_sk = customer.c_current_hdemo_sk 17 | and store_returns.sr_cdemo_sk = customer_demographics.cd_demo_sk 18 | order by customer_id 19 | limit 100; 20 | 21 | 22 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query85.sql: -------------------------------------------------------------------------------- 1 | 2 | select substr(r_reason_desc,1,20) as r 3 | ,avg(ws_quantity) wq 4 | ,avg(wr_refunded_cash) ref 5 | ,avg(wr_fee) fee 6 | from web_sales, web_returns, web_page, customer_demographics cd1, 7 | customer_demographics cd2, customer_address, date_dim, reason 8 | where web_sales.ws_web_page_sk = web_page.wp_web_page_sk 9 | and web_sales.ws_item_sk = web_returns.wr_item_sk 10 | and web_sales.ws_order_number = web_returns.wr_order_number 11 | and web_sales.ws_sold_date_sk = date_dim.d_date_sk and d_year = 1998 12 | and web_sales.ws_sold_date between '1998-01-01' and '1998-12-31' 13 | and cd1.cd_demo_sk = web_returns.wr_refunded_cdemo_sk 14 | and cd2.cd_demo_sk = web_returns.wr_returning_cdemo_sk 15 | and customer_address.ca_address_sk = web_returns.wr_refunded_addr_sk 16 | and reason.r_reason_sk = web_returns.wr_reason_sk 17 | and 18 | ( 19 | ( 20 | cd1.cd_marital_status = 'M' 21 | and 22 | cd1.cd_marital_status = cd2.cd_marital_status 23 | and 24 | cd1.cd_education_status = '4 yr Degree' 25 | and 26 | cd1.cd_education_status = cd2.cd_education_status 27 | and 28 | ws_sales_price between 100.00 and 150.00 29 | ) 30 | or 31 | ( 32 | cd1.cd_marital_status = 'D' 33 | and 34 | cd1.cd_marital_status = cd2.cd_marital_status 35 | and 36 | cd1.cd_education_status = 'Primary' 37 | and 38 | cd1.cd_education_status = cd2.cd_education_status 39 | and 40 | ws_sales_price between 50.00 and 100.00 41 | ) 42 | or 43 | ( 44 | cd1.cd_marital_status = 'U' 45 | and 46 | cd1.cd_marital_status = cd2.cd_marital_status 47 | and 48 | cd1.cd_education_status = 'Advanced Degree' 49 | and 50 | cd1.cd_education_status = cd2.cd_education_status 51 | and 52 | ws_sales_price between 150.00 and 200.00 53 | ) 54 | ) 55 | and 56 | ( 57 | ( 58 | ca_country = 'United States' 59 | and 60 | ca_state in ('KY', 'GA', 'NM') 61 | and ws_net_profit between 100 and 200 62 | ) 63 | or 64 | ( 65 | ca_country = 'United States' 66 | and 67 | ca_state in ('MT', 'OR', 'IN') 68 | and ws_net_profit between 150 and 300 69 | ) 70 | or 71 | ( 72 | ca_country = 'United States' 73 | and 74 | ca_state in ('WI', 'MO', 'WV') 75 | and ws_net_profit between 50 and 250 76 | ) 77 | ) 78 | group by r_reason_desc 79 | order by r, wq, ref, fee 80 | limit 100; 81 | 82 | 83 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query87.sql: -------------------------------------------------------------------------------- 1 | select count(*) 2 | from (select distinct c_last_name as l1, c_first_name as f1, d_date as d1 3 | from store_sales 4 | JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk 5 | JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk 6 | where 7 | d_month_seq between 1193 and 1193+11 8 | and ss_sold_date between '1999-06-01' and '2000-05-31' 9 | ) t1 10 | LEFT OUTER JOIN 11 | ( select distinct c_last_name as l2, c_first_name as f2, d_date as d2 12 | from catalog_sales 13 | JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 14 | JOIN customer ON catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 15 | where 16 | d_month_seq between 1193 and 1193+11 17 | and cs_sold_date between '1999-06-01' and '2000-05-31' 18 | ) t2 19 | ON t1.l1 = t2.l2 and 20 | t1.f1 = t2.f2 and 21 | t1.d1 = t2.d2 22 | LEFT OUTER JOIN 23 | (select distinct c_last_name as l3, c_first_name as f3, d_date as d3 24 | from web_sales 25 | JOIN date_dim ON web_sales.ws_sold_date_sk = date_dim.d_date_sk 26 | JOIN customer ON web_sales.ws_bill_customer_sk = customer.c_customer_sk 27 | where 28 | d_month_seq between 1193 and 1193+11 29 | and ws_sold_date between '1999-06-01' and '2000-05-31' 30 | ) t3 31 | ON t1.l1 = t3.l3 and 32 | t1.f1 = t3.f3 and 33 | t1.d1 = t3.d3 34 | WHERE 35 | l2 is null and 36 | l3 is null ; 37 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query89.sql: -------------------------------------------------------------------------------- 1 | 2 | select * 3 | from( 4 | select i_category, i_class, i_brand, 5 | s_store_name, s_company_name, 6 | d_moy, 7 | sum(ss_sales_price) sum_sales, 8 | avg(sum(ss_sales_price)) over 9 | (partition by i_category, i_brand, s_store_name, s_company_name) 10 | avg_monthly_sales 11 | from item, store_sales, date_dim, store 12 | where store_sales.ss_item_sk = item.i_item_sk and 13 | store_sales.ss_sold_date_sk = date_dim.d_date_sk and 14 | store_sales.ss_store_sk = store.s_store_sk and 15 | store_sales.ss_sold_date between '2000-01-01' and '2000-12-31' and 16 | d_year in (2000) and 17 | ((i_category in ('Home','Books','Electronics') and 18 | i_class in ('wallpaper','parenting','musical') 19 | ) 20 | or (i_category in ('Shoes','Jewelry','Men') and 21 | i_class in ('womens','birdal','pants') 22 | )) 23 | group by i_category, i_class, i_brand, 24 | s_store_name, s_company_name, d_moy) tmp1 25 | where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 26 | order by sum_sales - avg_monthly_sales, s_store_name 27 | limit 100; 28 | 29 | 30 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query90.sql: -------------------------------------------------------------------------------- 1 | 2 | select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio 3 | from ( select count(*) amc 4 | from web_sales, household_demographics , time_dim, web_page 5 | where ws_sold_time_sk = time_dim.t_time_sk 6 | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk 7 | and ws_web_page_sk = web_page.wp_web_page_sk 8 | and time_dim.t_hour between 6 and 6+1 9 | and household_demographics.hd_dep_count = 8 10 | and web_page.wp_char_count between 5000 and 5200) at, 11 | ( select count(*) pmc 12 | from web_sales, household_demographics , time_dim, web_page 13 | where ws_sold_time_sk = time_dim.t_time_sk 14 | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk 15 | and ws_web_page_sk = web_page.wp_web_page_sk 16 | and time_dim.t_hour between 14 and 14+1 17 | and household_demographics.hd_dep_count = 8 18 | and web_page.wp_char_count between 5000 and 5200) pt 19 | order by am_pm_ratio 20 | limit 100; 21 | 22 | 23 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query91.sql: -------------------------------------------------------------------------------- 1 | 2 | select 3 | cc_call_center_id Call_Center, 4 | cc_name Call_Center_Name, 5 | cc_manager Manager, 6 | sum(cr_net_loss) Returns_Loss 7 | from 8 | call_center, 9 | catalog_returns, 10 | date_dim, 11 | customer, 12 | customer_address, 13 | customer_demographics, 14 | household_demographics 15 | where 16 | catalog_returns.cr_call_center_sk = call_center.cc_call_center_sk 17 | and catalog_returns.cr_returned_date_sk = date_dim.d_date_sk 18 | and catalog_returns.cr_returning_customer_sk= customer.c_customer_sk 19 | and customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk 20 | and household_demographics.hd_demo_sk = customer.c_current_hdemo_sk 21 | and customer_address.ca_address_sk = customer.c_current_addr_sk 22 | and d_year = 1999 23 | and d_moy = 11 24 | and cr_returned_date between '1999-11-01' and '1999-11-31' 25 | and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') 26 | or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) 27 | and hd_buy_potential like '0-500%' 28 | and ca_gmt_offset = -7 29 | group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status 30 | order by Returns_Loss desc; 31 | 32 | 33 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query92.sql: -------------------------------------------------------------------------------- 1 | SELECT sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 2 | else 0 end) as store_only, 3 | sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 4 | else 0 end) as catalog_only, 5 | sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 6 | else 0 end) as store_and_catalog 7 | FROM (SELECT ss.ss_customer_sk as customer_sk, 8 | ss.ss_item_sk as item_sk 9 | FROM store_sales ss 10 | JOIN date_dim d1 ON (ss.ss_sold_date_sk = d1.d_date_sk) 11 | WHERE d1.d_month_seq >= 1206 and 12 | d1.d_month_seq <= 1217 13 | and ss_sold_date between '2000-07-01' and '2001-06-30' 14 | GROUP BY ss.ss_customer_sk, ss.ss_item_sk) ssci 15 | FULL OUTER JOIN (SELECT cs.cs_bill_customer_sk as customer_sk, 16 | cs.cs_item_sk as item_sk 17 | FROM catalog_sales cs 18 | JOIN date_dim d2 ON (cs.cs_sold_date_sk = d2.d_date_sk) 19 | WHERE d2.d_month_seq >= 1206 and 20 | d2.d_month_seq <= 1217 21 | and cs_sold_date between '2000-07-01' and '2001-06-30' 22 | GROUP BY cs.cs_bill_customer_sk, cs.cs_item_sk) csci 23 | ON (ssci.customer_sk=csci.customer_sk and 24 | ssci.item_sk = csci.item_sk); 25 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query93.sql: -------------------------------------------------------------------------------- 1 | 2 | select ss_customer_sk 3 | ,sum(act_sales) sumsales 4 | from (select ss_item_sk 5 | ,ss_ticket_number 6 | ,ss_customer_sk 7 | ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price 8 | else (ss_quantity*ss_sales_price) end act_sales 9 | from store_sales left outer join store_returns on (store_returns.sr_item_sk = store_sales.ss_item_sk 10 | and store_returns.sr_ticket_number = store_sales.ss_ticket_number) 11 | ,reason 12 | where store_returns.sr_reason_sk = reason.r_reason_sk 13 | and r_reason_desc = 'Did not like the warranty') t 14 | group by ss_customer_sk 15 | order by sumsales, ss_customer_sk 16 | limit 100; 17 | 18 | 19 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query94.sql: -------------------------------------------------------------------------------- 1 | SELECT count(distinct ws_order_number) as order_count, 2 | sum(ws_ext_ship_cost) as total_shipping_cost, 3 | sum(ws_net_profit) as total_net_profit 4 | FROM web_sales ws1 5 | JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) 6 | JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) 7 | JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) 8 | LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number 9 | FROM web_sales ws2 JOIN web_sales ws3 10 | ON (ws2.ws_order_number = ws3.ws_order_number) 11 | WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk 12 | and ws2.ws_sold_date between '1999-5-01' and '1999-7-01' 13 | and ws3.ws_sold_date between '1999-5-01' and '1999-7-01' 14 | ) ws_wh1 15 | ON (ws1.ws_order_number = ws_wh1.ws_order_number) 16 | LEFT OUTER JOIN web_returns wr1 ON (ws1.ws_order_number = wr1.wr_order_number) 17 | WHERE d.d_date between '1999-5-01' and '1999-7-01' and 18 | ws_sold_date between '1999-5-01' and '1999-7-01' and 19 | ca.ca_state = 'TX' and 20 | s.web_company_name = 'pri' and 21 | wr1.wr_order_number is null 22 | limit 100; 23 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query95.sql: -------------------------------------------------------------------------------- 1 | SELECT count(distinct ws1.ws_order_number) as order_count, 2 | sum(ws1.ws_ext_ship_cost) as total_shipping_cost, 3 | sum(ws1.ws_net_profit) as total_net_profit 4 | FROM web_sales ws1 5 | JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) 6 | JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) 7 | JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) 8 | LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number 9 | FROM web_sales ws2 JOIN web_sales ws3 10 | ON (ws2.ws_order_number = ws3.ws_order_number) 11 | WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk 12 | and ws2.ws_sold_date between '2002-5-01' and '2002-6-30' 13 | and ws3.ws_sold_date between '2002-5-01' and '2002-6-30' 14 | ) ws_wh1 15 | ON (ws1.ws_order_number = ws_wh1.ws_order_number) 16 | LEFT SEMI JOIN (SELECT wr_order_number 17 | FROM web_returns wr 18 | JOIN (SELECT ws4.ws_order_number as ws_order_number 19 | FROM web_sales ws4 JOIN web_sales ws5 20 | ON (ws4.ws_order_number = ws5.ws_order_number) 21 | WHERE ws4.ws_warehouse_sk <> ws5.ws_warehouse_sk 22 | and ws4.ws_sold_date between '2002-5-01' and '2002-6-30' 23 | and ws5.ws_sold_date between '2002-5-01' and '2002-6-30' 24 | ) ws_wh2 25 | ON (wr.wr_order_number = ws_wh2.ws_order_number)) tmp1 26 | ON (ws1.ws_order_number = tmp1.wr_order_number) 27 | WHERE d.d_date between '2002-5-01' and '2002-6-30' and 28 | ws_sold_date between '2002-5-01' and '2002-6-30' and 29 | ca.ca_state = 'GA' and 30 | s.web_company_name = 'pri'; 31 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query96.sql: -------------------------------------------------------------------------------- 1 | 2 | select count(*) as c 3 | from store_sales 4 | ,household_demographics 5 | ,time_dim, store 6 | where store_sales.ss_sold_time_sk = time_dim.t_time_sk 7 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 8 | and store_sales.ss_store_sk = store.s_store_sk 9 | and time_dim.t_hour = 8 10 | and time_dim.t_minute >= 30 11 | and household_demographics.hd_dep_count = 5 12 | and store.s_store_name = 'ese' 13 | order by c 14 | limit 100; 15 | 16 | 17 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query97.sql: -------------------------------------------------------------------------------- 1 | select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only 2 | ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only 3 | ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog 4 | from 5 | ( select ss_customer_sk customer_sk 6 | ,ss_item_sk item_sk 7 | from store_sales 8 | JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk 9 | where 10 | d_month_seq between 1193 and 1193 + 11 11 | and ss_sold_date between '1999-06-01' and '2000-05-31' 12 | group by ss_customer_sk ,ss_item_sk) ssci 13 | full outer join 14 | ( select cs_bill_customer_sk customer_sk 15 | ,cs_item_sk item_sk 16 | from catalog_sales 17 | JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 18 | where 19 | d_month_seq between 1193 and 1193 + 11 20 | and cs_sold_date between '1999-06-01' and '2000-05-31' 21 | group by cs_bill_customer_sk ,cs_item_sk) csci 22 | on (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk) 23 | limit 100; 24 | 25 | -------------------------------------------------------------------------------- /sample-queries-tpcds/query98.sql: -------------------------------------------------------------------------------- 1 | 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,i_item_id 7 | ,sum(ss_ext_sales_price) as itemrevenue 8 | ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over 9 | (partition by i_class) as revenueratio 10 | from 11 | store_sales 12 | ,item 13 | ,date_dim 14 | where 15 | store_sales.ss_item_sk = item.i_item_sk 16 | and i_category in ('Jewelry', 'Sports', 'Books') 17 | and store_sales.ss_sold_date_sk = date_dim.d_date_sk 18 | and d_date between cast('2001-01-12' as date) 19 | and (cast('2001-02-11' as date)) 20 | and ss_sold_date between '2001-01-12' and '2001-02-11' 21 | group by 22 | i_item_id 23 | ,i_item_desc 24 | ,i_category 25 | ,i_class 26 | ,i_current_price 27 | order by 28 | i_category 29 | ,i_class 30 | ,i_item_id 31 | ,i_item_desc 32 | ,revenueratio; 33 | 34 | 35 | -------------------------------------------------------------------------------- /sample-queries-tpcds/testbench-withATS.settings: -------------------------------------------------------------------------------- 1 | set ambari.hive.db.schema.name=hive; 2 | set fs.file.impl.disable.cache=true; 3 | set fs.hdfs.impl.disable.cache=true; 4 | set hive.auto.convert.join.noconditionaltask=true; 5 | set hive.auto.convert.join=true; 6 | set hive.auto.convert.sortmerge.join.noconditionaltask=true; 7 | set hive.auto.convert.sortmerge.join=true; 8 | set hive.compactor.abortedtxn.threshold=1000; 9 | set hive.compactor.check.interval=300; 10 | set hive.compactor.delta.num.threshold=10; 11 | set hive.compactor.delta.pct.threshold=0.1f; 12 | set hive.compactor.initiator.on=false; 13 | set hive.compactor.worker.threads=0; 14 | set hive.compactor.worker.timeout=86400; 15 | set hive.compute.query.using.stats=true; 16 | set hive.enforce.bucketing=true; 17 | set hive.enforce.sorting=true; 18 | set hive.enforce.sortmergebucketmapjoin=true; 19 | set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook; 20 | set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook; 21 | set hive.exec.pre.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook; 22 | set hive.execution.engine=mr; 23 | set hive.limit.pushdown.memory.usage=0.04; 24 | set hive.map.aggr=true; 25 | set hive.mapjoin.bucket.cache.size=10000; 26 | set hive.mapred.reduce.tasks.speculative.execution=false; 27 | set hive.metastore.cache.pinobjtypes=Table,Database,Type,FieldSchema,Order; 28 | set hive.metastore.client.socket.timeout=60; 29 | set hive.metastore.execute.setugi=true; 30 | set hive.metastore.warehouse.dir=/apps/hive/warehouse; 31 | set hive.optimize.bucketmapjoin.sortedmerge=false; 32 | set hive.optimize.bucketmapjoin=true; 33 | set hive.optimize.index.filter=true; 34 | set hive.optimize.mapjoin.mapreduce=true; 35 | set hive.optimize.reducededuplication.min.reducer=4; 36 | set hive.optimize.reducededuplication=true; 37 | set hive.orc.splits.include.file.footer=false; 38 | set hive.security.authorization.enabled=false; 39 | set hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider; 40 | set hive.semantic.analyzer.factory.impl=org.apache.hivealog.cli.HCatSemanticAnalyzerFactory; 41 | set hive.server2.enable.doAs=false; 42 | set hive.server2.tez.default.queues=default; 43 | set hive.server2.tez.initialize.default.sessions=false; 44 | set hive.server2.tez.sessions.per.default.queue=1; 45 | set hive.stats.autogather=true; 46 | set hive.tez.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; 47 | set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager; 48 | set hive.txn.max.open.batch=1000; 49 | set hive.txn.timeout=300; 50 | set hive.vectorized.execution.enabled=true; 51 | set hive.vectorized.groupby.checkinterval=1024; 52 | set hive.vectorized.groupby.flush.percent=1; 53 | set hive.vectorized.groupby.maxentries=1024; 54 | 55 | -- These values need to be tuned appropriately to your cluster. These examples are for reference. 56 | -- set hive.tez.container.size=4096; 57 | -- set hive.tez.java.opts=-Xmx3800m; 58 | -- set hive.auto.convert.join.noconditionaltask.size=1252698795; 59 | -------------------------------------------------------------------------------- /sample-queries-tpcds/testbench.settings: -------------------------------------------------------------------------------- 1 | set ambari.hive.db.schema.name=hive; 2 | set fs.file.impl.disable.cache=true; 3 | set fs.hdfs.impl.disable.cache=true; 4 | set hive.auto.convert.join.noconditionaltask=true; 5 | set hive.auto.convert.join=true; 6 | set hive.auto.convert.sortmerge.join.noconditionaltask=true; 7 | set hive.auto.convert.sortmerge.join=true; 8 | set hive.compactor.abortedtxn.threshold=1000; 9 | set hive.compactor.check.interval=300; 10 | set hive.compactor.delta.num.threshold=10; 11 | set hive.compactor.delta.pct.threshold=0.1f; 12 | set hive.compactor.initiator.on=false; 13 | set hive.compactor.worker.threads=0; 14 | set hive.compactor.worker.timeout=86400; 15 | set hive.compute.query.using.stats=true; 16 | set hive.enforce.bucketing=true; 17 | set hive.enforce.sorting=true; 18 | set hive.enforce.sortmergebucketmapjoin=true; 19 | set hive.execution.engine=mr; 20 | set hive.limit.pushdown.memory.usage=0.04; 21 | set hive.map.aggr=true; 22 | set hive.mapjoin.bucket.cache.size=10000; 23 | set hive.mapred.reduce.tasks.speculative.execution=false; 24 | set hive.metastore.cache.pinobjtypes=Table,Database,Type,FieldSchema,Order; 25 | set hive.metastore.client.socket.timeout=60; 26 | set hive.metastore.execute.setugi=true; 27 | set hive.metastore.warehouse.dir=/apps/hive/warehouse; 28 | set hive.optimize.bucketmapjoin.sortedmerge=false; 29 | set hive.optimize.bucketmapjoin=true; 30 | set hive.optimize.index.filter=true; 31 | set hive.optimize.mapjoin.mapreduce=true; 32 | set hive.optimize.reducededuplication.min.reducer=4; 33 | set hive.optimize.reducededuplication=true; 34 | set hive.orc.splits.include.file.footer=false; 35 | set hive.security.authorization.enabled=false; 36 | set hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider; 37 | set hive.semantic.analyzer.factory.impl=org.apache.hivealog.cli.HCatSemanticAnalyzerFactory; 38 | set hive.server2.enable.doAs=false; 39 | set hive.server2.tez.default.queues=default; 40 | set hive.server2.tez.initialize.default.sessions=false; 41 | set hive.server2.tez.sessions.per.default.queue=1; 42 | set hive.stats.autogather=true; 43 | set hive.tez.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; 44 | set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager; 45 | set hive.txn.max.open.batch=1000; 46 | set hive.txn.timeout=300; 47 | set hive.vectorized.execution.enabled=true; 48 | set hive.vectorized.groupby.checkinterval=1024; 49 | set hive.vectorized.groupby.flush.percent=1; 50 | set hive.vectorized.groupby.maxentries=1024; 51 | 52 | -- These values need to be tuned appropriately to your cluster. These examples are for reference. 53 | -- set hive.tez.container.size=4096; 54 | -- set hive.tez.java.opts=-Xmx3800m; 55 | -- set hive.auto.convert.join.noconditionaltask.size=1252698795; 56 | -------------------------------------------------------------------------------- /sample-queries-tpch/README.md: -------------------------------------------------------------------------------- 1 | Sample TPC-H Queries 2 | ==================== 3 | 4 | This directory contains sample TPC-H queries you can run once you have generated your data. Queries are compatible with Apache Hive 13 and up. 5 | -------------------------------------------------------------------------------- /sample-queries-tpch/testbench-withATS.settings: -------------------------------------------------------------------------------- 1 | set ambari.hive.db.schema.name=hive; 2 | set fs.file.impl.disable.cache=true; 3 | set fs.hdfs.impl.disable.cache=true; 4 | set hive.auto.convert.join.noconditionaltask=true; 5 | set hive.auto.convert.join=true; 6 | set hive.auto.convert.sortmerge.join.noconditionaltask=true; 7 | set hive.auto.convert.sortmerge.join=true; 8 | set hive.compactor.abortedtxn.threshold=1000; 9 | set hive.compactor.check.interval=300; 10 | set hive.compactor.delta.num.threshold=10; 11 | set hive.compactor.delta.pct.threshold=0.1f; 12 | set hive.compactor.initiator.on=false; 13 | set hive.compactor.worker.threads=0; 14 | set hive.compactor.worker.timeout=86400; 15 | set hive.compute.query.using.stats=true; 16 | set hive.enforce.bucketing=true; 17 | set hive.enforce.sorting=true; 18 | set hive.enforce.sortmergebucketmapjoin=true; 19 | set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook; 20 | set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook; 21 | set hive.exec.pre.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook; 22 | set hive.execution.engine=mr; 23 | set hive.limit.pushdown.memory.usage=0.04; 24 | set hive.map.aggr=true; 25 | set hive.mapjoin.bucket.cache.size=10000; 26 | set hive.mapred.reduce.tasks.speculative.execution=false; 27 | set hive.metastore.cache.pinobjtypes=Table,Database,Type,FieldSchema,Order; 28 | set hive.metastore.client.socket.timeout=60; 29 | set hive.metastore.execute.setugi=true; 30 | set hive.metastore.warehouse.dir=/apps/hive/warehouse; 31 | set hive.optimize.bucketmapjoin.sortedmerge=false; 32 | set hive.optimize.bucketmapjoin=true; 33 | set hive.optimize.index.filter=true; 34 | set hive.optimize.mapjoin.mapreduce=true; 35 | set hive.optimize.reducededuplication.min.reducer=4; 36 | set hive.optimize.reducededuplication=true; 37 | set hive.orc.splits.include.file.footer=false; 38 | set hive.security.authorization.enabled=false; 39 | set hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider; 40 | set hive.semantic.analyzer.factory.impl=org.apache.hivealog.cli.HCatSemanticAnalyzerFactory; 41 | set hive.server2.enable.doAs=false; 42 | set hive.server2.tez.default.queues=default; 43 | set hive.server2.tez.initialize.default.sessions=false; 44 | set hive.server2.tez.sessions.per.default.queue=1; 45 | set hive.stats.autogather=true; 46 | set hive.tez.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; 47 | set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager; 48 | set hive.txn.max.open.batch=1000; 49 | set hive.txn.timeout=300; 50 | set hive.vectorized.execution.enabled=true; 51 | set hive.vectorized.groupby.checkinterval=1024; 52 | set hive.vectorized.groupby.flush.percent=1; 53 | set hive.vectorized.groupby.maxentries=1024; 54 | 55 | -- These values need to be tuned appropriately to your cluster. These examples are for reference. 56 | -- set hive.tez.container.size=4096; 57 | -- set hive.tez.java.opts=-Xmx3800m; 58 | -- set hive.auto.convert.join.noconditionaltask.size=1252698795; 59 | -------------------------------------------------------------------------------- /sample-queries-tpch/testbench.settings: -------------------------------------------------------------------------------- 1 | set ambari.hive.db.schema.name=hive; 2 | set fs.file.impl.disable.cache=true; 3 | set fs.hdfs.impl.disable.cache=true; 4 | set hive.auto.convert.join.noconditionaltask=true; 5 | set hive.auto.convert.join=true; 6 | set hive.auto.convert.sortmerge.join.noconditionaltask=true; 7 | set hive.auto.convert.sortmerge.join=true; 8 | set hive.compactor.abortedtxn.threshold=1000; 9 | set hive.compactor.check.interval=300; 10 | set hive.compactor.delta.num.threshold=10; 11 | set hive.compactor.delta.pct.threshold=0.1f; 12 | set hive.compactor.initiator.on=false; 13 | set hive.compactor.worker.threads=0; 14 | set hive.compactor.worker.timeout=86400; 15 | set hive.compute.query.using.stats=true; 16 | set hive.enforce.bucketing=true; 17 | set hive.enforce.sorting=true; 18 | set hive.enforce.sortmergebucketmapjoin=true; 19 | set hive.execution.engine=mr; 20 | set hive.limit.pushdown.memory.usage=0.04; 21 | set hive.map.aggr=true; 22 | set hive.mapjoin.bucket.cache.size=10000; 23 | set hive.mapred.reduce.tasks.speculative.execution=false; 24 | set hive.metastore.cache.pinobjtypes=Table,Database,Type,FieldSchema,Order; 25 | set hive.metastore.client.socket.timeout=60; 26 | set hive.metastore.execute.setugi=true; 27 | set hive.metastore.warehouse.dir=/apps/hive/warehouse; 28 | set hive.optimize.bucketmapjoin.sortedmerge=false; 29 | set hive.optimize.bucketmapjoin=true; 30 | set hive.optimize.index.filter=true; 31 | set hive.optimize.mapjoin.mapreduce=true; 32 | set hive.optimize.reducededuplication.min.reducer=4; 33 | set hive.optimize.reducededuplication=true; 34 | set hive.orc.splits.include.file.footer=false; 35 | set hive.security.authorization.enabled=false; 36 | set hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider; 37 | set hive.semantic.analyzer.factory.impl=org.apache.hivealog.cli.HCatSemanticAnalyzerFactory; 38 | set hive.server2.enable.doAs=false; 39 | set hive.server2.tez.default.queues=default; 40 | set hive.server2.tez.initialize.default.sessions=false; 41 | set hive.server2.tez.sessions.per.default.queue=1; 42 | set hive.stats.autogather=true; 43 | set hive.tez.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; 44 | set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager; 45 | set hive.txn.max.open.batch=1000; 46 | set hive.txn.timeout=300; 47 | set hive.vectorized.execution.enabled=true; 48 | set hive.vectorized.groupby.checkinterval=1024; 49 | set hive.vectorized.groupby.flush.percent=1; 50 | set hive.vectorized.groupby.maxentries=1024; 51 | 52 | -- These values need to be tuned appropriately to your cluster. These examples are for reference. 53 | -- set hive.tez.container.size=4096; 54 | -- set hive.tez.java.opts=-Xmx3800m; 55 | -- set hive.auto.convert.join.noconditionaltask.size=1252698795; 56 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query1.sql: -------------------------------------------------------------------------------- 1 | select 2 | l_returnflag, 3 | l_linestatus, 4 | sum(l_quantity) as sum_qty, 5 | sum(l_extendedprice) as sum_base_price, 6 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, 7 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, 8 | avg(l_quantity) as avg_qty, 9 | avg(l_extendedprice) as avg_price, 10 | avg(l_discount) as avg_disc, 11 | count(*) as count_order 12 | from 13 | lineitem 14 | where 15 | l_shipdate <= '1998-09-16' 16 | group by 17 | l_returnflag, 18 | l_linestatus 19 | order by 20 | l_returnflag, 21 | l_linestatus; 22 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query10.sql: -------------------------------------------------------------------------------- 1 | select 2 | c_custkey, 3 | c_name, 4 | sum(l_extendedprice * (1 - l_discount)) as revenue, 5 | c_acctbal, 6 | n_name, 7 | c_address, 8 | c_phone, 9 | c_comment 10 | from 11 | customer, 12 | orders, 13 | lineitem, 14 | nation 15 | where 16 | c_custkey = o_custkey 17 | and l_orderkey = o_orderkey 18 | and o_orderdate >= '1993-07-01' 19 | and o_orderdate < '1993-10-01' 20 | and l_returnflag = 'R' 21 | and c_nationkey = n_nationkey 22 | group by 23 | c_custkey, 24 | c_name, 25 | c_acctbal, 26 | c_phone, 27 | n_name, 28 | c_address, 29 | c_comment 30 | order by 31 | revenue desc 32 | limit 20; 33 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query11.sql: -------------------------------------------------------------------------------- 1 | drop view q11_part_tmp_cached; 2 | drop view q11_sum_tmp_cached; 3 | 4 | create view q11_part_tmp_cached as 5 | select 6 | ps_partkey, 7 | sum(ps_supplycost * ps_availqty) as part_value 8 | from 9 | partsupp, 10 | supplier, 11 | nation 12 | where 13 | ps_suppkey = s_suppkey 14 | and s_nationkey = n_nationkey 15 | and n_name = 'GERMANY' 16 | group by ps_partkey; 17 | 18 | create view q11_sum_tmp_cached as 19 | select 20 | sum(part_value) as total_value 21 | from 22 | q11_part_tmp_cached; 23 | 24 | select 25 | ps_partkey, part_value as value 26 | from ( 27 | select 28 | ps_partkey, 29 | part_value, 30 | total_value 31 | from 32 | q11_part_tmp_cached join q11_sum_tmp_cached 33 | ) a 34 | where 35 | part_value > total_value * 0.0001 36 | order by 37 | value desc; 38 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query12.sql: -------------------------------------------------------------------------------- 1 | select 2 | l_shipmode, 3 | sum(case 4 | when o_orderpriority = '1-URGENT' 5 | or o_orderpriority = '2-HIGH' 6 | then 1 7 | else 0 8 | end) as high_line_count, 9 | sum(case 10 | when o_orderpriority <> '1-URGENT' 11 | and o_orderpriority <> '2-HIGH' 12 | then 1 13 | else 0 14 | end) as low_line_count 15 | from 16 | orders, 17 | lineitem 18 | where 19 | o_orderkey = l_orderkey 20 | and l_shipmode in ('REG AIR', 'MAIL') 21 | and l_commitdate < l_receiptdate 22 | and l_shipdate < l_commitdate 23 | and l_receiptdate >= '1995-01-01' 24 | and l_receiptdate < '1996-01-01' 25 | group by 26 | l_shipmode 27 | order by 28 | l_shipmode; 29 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query13.sql: -------------------------------------------------------------------------------- 1 | select 2 | c_count, 3 | count(*) as custdist 4 | from 5 | ( 6 | select 7 | c_custkey, 8 | count(o_orderkey) as c_count 9 | from 10 | customer left outer join orders on 11 | c_custkey = o_custkey 12 | and o_comment not like '%unusual%accounts%' 13 | group by 14 | c_custkey 15 | ) c_orders 16 | group by 17 | c_count 18 | order by 19 | custdist desc, 20 | c_count desc; 21 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query14.sql: -------------------------------------------------------------------------------- 1 | select 2 | 100.00 * sum(case 3 | when p_type like 'PROMO%' 4 | then l_extendedprice * (1 - l_discount) 5 | else 0 6 | end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue 7 | from 8 | lineitem, 9 | part 10 | where 11 | l_partkey = p_partkey 12 | and l_shipdate >= '1995-08-01' 13 | and l_shipdate < '1995-09-01'; 14 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query15.sql: -------------------------------------------------------------------------------- 1 | drop view revenue_cached; 2 | drop view max_revenue_cached; 3 | 4 | create view revenue_cached as 5 | select 6 | l_suppkey as supplier_no, 7 | sum(l_extendedprice * (1 - l_discount)) as total_revenue 8 | from 9 | lineitem 10 | where 11 | l_shipdate >= '1996-01-01' 12 | and l_shipdate < '1996-04-01' 13 | group by l_suppkey; 14 | 15 | create view max_revenue_cached as 16 | select 17 | max(total_revenue) as max_revenue 18 | from 19 | revenue_cached; 20 | 21 | select 22 | s_suppkey, 23 | s_name, 24 | s_address, 25 | s_phone, 26 | total_revenue 27 | from 28 | supplier, 29 | revenue_cached, 30 | max_revenue_cached 31 | where 32 | s_suppkey = supplier_no 33 | and total_revenue = max_revenue 34 | order by s_suppkey; 35 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query16.sql: -------------------------------------------------------------------------------- 1 | select 2 | p_brand, 3 | p_type, 4 | p_size, 5 | count(distinct ps_suppkey) as supplier_cnt 6 | from 7 | partsupp, 8 | part 9 | where 10 | p_partkey = ps_partkey 11 | and p_brand <> 'Brand#34' 12 | and p_type not like 'ECONOMY BRUSHED%' 13 | and p_size in (22, 14, 27, 49, 21, 33, 35, 28) 14 | and partsupp.ps_suppkey not in ( 15 | select 16 | s_suppkey 17 | from 18 | supplier 19 | where 20 | s_comment like '%Customer%Complaints%' 21 | ) 22 | group by 23 | p_brand, 24 | p_type, 25 | p_size 26 | order by 27 | supplier_cnt desc, 28 | p_brand, 29 | p_type, 30 | p_size; 31 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query17.sql: -------------------------------------------------------------------------------- 1 | drop view q17_lineitem_tmp_cached; 2 | 3 | create view q17_lineitem_tmp_cached as 4 | select 5 | l_partkey as t_partkey, 6 | 0.2 * avg(l_quantity) as t_avg_quantity 7 | from 8 | lineitem 9 | group by l_partkey; 10 | 11 | select 12 | sum(l_extendedprice) / 7.0 as avg_yearly 13 | from ( 14 | select 15 | l_quantity, 16 | l_extendedprice, 17 | t_avg_quantity 18 | from 19 | q17_lineitem_tmp_cached join 20 | (select 21 | l_quantity, 22 | l_partkey, 23 | l_extendedprice 24 | from 25 | part, 26 | lineitem 27 | where 28 | p_partkey = l_partkey 29 | and p_brand = 'Brand#23' 30 | and p_container = 'MED BOX' 31 | ) l1 on l1.l_partkey = t_partkey 32 | ) a 33 | where l_quantity < t_avg_quantity; 34 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query18.sql: -------------------------------------------------------------------------------- 1 | drop view q18_tmp_cached; 2 | drop table q18_large_volume_customer_cached; 3 | 4 | create view q18_tmp_cached as 5 | select 6 | l_orderkey, 7 | sum(l_quantity) as t_sum_quantity 8 | from 9 | lineitem 10 | where 11 | l_orderkey is not null 12 | group by 13 | l_orderkey; 14 | 15 | create table q18_large_volume_customer_cached as 16 | select 17 | c_name, 18 | c_custkey, 19 | o_orderkey, 20 | o_orderdate, 21 | o_totalprice, 22 | sum(l_quantity) 23 | from 24 | customer, 25 | orders, 26 | q18_tmp_cached t, 27 | lineitem l 28 | where 29 | c_custkey = o_custkey 30 | and o_orderkey = t.l_orderkey 31 | and o_orderkey is not null 32 | and t.t_sum_quantity > 300 33 | and o_orderkey = l.l_orderkey 34 | and l.l_orderkey is not null 35 | group by 36 | c_name, 37 | c_custkey, 38 | o_orderkey, 39 | o_orderdate, 40 | o_totalprice 41 | order by 42 | o_totalprice desc, 43 | o_orderdate 44 | limit 100; 45 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query19.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l_extendedprice* (1 - l_discount)) as revenue 3 | from 4 | lineitem, 5 | part 6 | where 7 | ( 8 | p_partkey = l_partkey 9 | and p_brand = 'Brand#32' 10 | and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') 11 | and l_quantity >= 7 and l_quantity <= 7 + 10 12 | and p_size between 1 and 5 13 | and l_shipmode in ('AIR', 'AIR REG') 14 | and l_shipinstruct = 'DELIVER IN PERSON' 15 | ) 16 | or 17 | ( 18 | p_partkey = l_partkey 19 | and p_brand = 'Brand#35' 20 | and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') 21 | and l_quantity >= 15 and l_quantity <= 15 + 10 22 | and p_size between 1 and 10 23 | and l_shipmode in ('AIR', 'AIR REG') 24 | and l_shipinstruct = 'DELIVER IN PERSON' 25 | ) 26 | or 27 | ( 28 | p_partkey = l_partkey 29 | and p_brand = 'Brand#24' 30 | and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') 31 | and l_quantity >= 26 and l_quantity <= 26 + 10 32 | and p_size between 1 and 15 33 | and l_shipmode in ('AIR', 'AIR REG') 34 | and l_shipinstruct = 'DELIVER IN PERSON' 35 | ); 36 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query2.sql: -------------------------------------------------------------------------------- 1 | drop view q2_min_ps_supplycost; 2 | create view q2_min_ps_supplycost as 3 | select 4 | p_partkey as min_p_partkey, 5 | min(ps_supplycost) as min_ps_supplycost 6 | from 7 | part, 8 | partsupp, 9 | supplier, 10 | nation, 11 | region 12 | where 13 | p_partkey = ps_partkey 14 | and s_suppkey = ps_suppkey 15 | and s_nationkey = n_nationkey 16 | and n_regionkey = r_regionkey 17 | and r_name = 'EUROPE' 18 | group by 19 | p_partkey; 20 | 21 | select 22 | s_acctbal, 23 | s_name, 24 | n_name, 25 | p_partkey, 26 | p_mfgr, 27 | s_address, 28 | s_phone, 29 | s_comment 30 | from 31 | part, 32 | supplier, 33 | partsupp, 34 | nation, 35 | region, 36 | q2_min_ps_supplycost 37 | where 38 | p_partkey = ps_partkey 39 | and s_suppkey = ps_suppkey 40 | and p_size = 37 41 | and p_type like '%COPPER' 42 | and s_nationkey = n_nationkey 43 | and n_regionkey = r_regionkey 44 | and r_name = 'EUROPE' 45 | and ps_supplycost = min_ps_supplycost 46 | and p_partkey = min_p_partkey 47 | order by 48 | s_acctbal desc, 49 | n_name, 50 | s_name, 51 | p_partkey 52 | limit 100; 53 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query20.sql: -------------------------------------------------------------------------------- 1 | drop view q20_tmp1_cached; 2 | drop view q20_tmp2_cached; 3 | drop view q20_tmp3_cached; 4 | drop view q20_tmp4_cached; 5 | 6 | create view q20_tmp1_cached as 7 | select distinct p_partkey 8 | from 9 | part 10 | where 11 | p_name like 'forest%'; 12 | 13 | create view q20_tmp2_cached as 14 | select 15 | l_partkey, 16 | l_suppkey, 17 | 0.5 * sum(l_quantity) as sum_quantity 18 | from 19 | lineitem 20 | where 21 | l_shipdate >= '1994-01-01' 22 | and l_shipdate < '1995-01-01' 23 | group by l_partkey, l_suppkey; 24 | 25 | create view q20_tmp3_cached as 26 | select 27 | ps_suppkey, 28 | ps_availqty, 29 | sum_quantity 30 | from 31 | partsupp, q20_tmp1_cached, q20_tmp2_cached 32 | where 33 | ps_partkey = p_partkey 34 | and ps_partkey = l_partkey 35 | and ps_suppkey = l_suppkey; 36 | 37 | create view q20_tmp4_cached as 38 | select 39 | ps_suppkey 40 | from 41 | q20_tmp3_cached 42 | where 43 | ps_availqty > sum_quantity 44 | group by ps_suppkey; 45 | 46 | select 47 | s_name, 48 | s_address 49 | from 50 | supplier, 51 | nation, 52 | q20_tmp4_cached 53 | where 54 | s_nationkey = n_nationkey 55 | and n_name = 'CANADA' 56 | and s_suppkey = ps_suppkey 57 | order by s_name; 58 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query21.sql: -------------------------------------------------------------------------------- 1 | drop view q21_tmp1_cached; 2 | drop view q21_tmp2_cached; 3 | 4 | create view q21_tmp1_cached as 5 | select 6 | l_orderkey, 7 | count(distinct l_suppkey) as count_suppkey, 8 | max(l_suppkey) as max_suppkey 9 | from 10 | lineitem 11 | where 12 | l_orderkey is not null 13 | group by 14 | l_orderkey; 15 | 16 | create view q21_tmp2_cached as 17 | select 18 | l_orderkey, 19 | count(distinct l_suppkey) count_suppkey, 20 | max(l_suppkey) as max_suppkey 21 | from 22 | lineitem 23 | where 24 | l_receiptdate > l_commitdate 25 | and l_orderkey is not null 26 | group by 27 | l_orderkey; 28 | 29 | select 30 | s_name, 31 | count(1) as numwait 32 | from ( 33 | select s_name from ( 34 | select 35 | s_name, 36 | t2.l_orderkey, 37 | l_suppkey, 38 | count_suppkey, 39 | max_suppkey 40 | from 41 | q21_tmp2_cached t2 right outer join ( 42 | select 43 | s_name, 44 | l_orderkey, 45 | l_suppkey from ( 46 | select 47 | s_name, 48 | t1.l_orderkey, 49 | l_suppkey, 50 | count_suppkey, 51 | max_suppkey 52 | from 53 | q21_tmp1_cached t1 join ( 54 | select 55 | s_name, 56 | l_orderkey, 57 | l_suppkey 58 | from 59 | orders o join ( 60 | select 61 | s_name, 62 | l_orderkey, 63 | l_suppkey 64 | from 65 | nation n join supplier s 66 | on 67 | s.s_nationkey = n.n_nationkey 68 | and n.n_name = 'SAUDI ARABIA' 69 | join lineitem l 70 | on 71 | s.s_suppkey = l.l_suppkey 72 | where 73 | l.l_receiptdate > l.l_commitdate 74 | and l.l_orderkey is not null 75 | ) l1 on o.o_orderkey = l1.l_orderkey and o.o_orderstatus = 'F' 76 | ) l2 on l2.l_orderkey = t1.l_orderkey 77 | ) a 78 | where 79 | (count_suppkey > 1) 80 | or ((count_suppkey=1) 81 | and (l_suppkey <> max_suppkey)) 82 | ) l3 on l3.l_orderkey = t2.l_orderkey 83 | ) b 84 | where 85 | (count_suppkey is null) 86 | or ((count_suppkey=1) 87 | and (l_suppkey = max_suppkey)) 88 | ) c 89 | group by 90 | s_name 91 | order by 92 | numwait desc, 93 | s_name 94 | limit 100; 95 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query22.sql: -------------------------------------------------------------------------------- 1 | drop view q22_customer_tmp_cached; 2 | drop view q22_customer_tmp1_cached; 3 | drop view q22_orders_tmp_cached; 4 | 5 | create view if not exists q22_customer_tmp_cached as 6 | select 7 | c_acctbal, 8 | c_custkey, 9 | substr(c_phone, 1, 2) as cntrycode 10 | from 11 | customer 12 | where 13 | substr(c_phone, 1, 2) = '13' or 14 | substr(c_phone, 1, 2) = '31' or 15 | substr(c_phone, 1, 2) = '23' or 16 | substr(c_phone, 1, 2) = '29' or 17 | substr(c_phone, 1, 2) = '30' or 18 | substr(c_phone, 1, 2) = '18' or 19 | substr(c_phone, 1, 2) = '17'; 20 | 21 | create view if not exists q22_customer_tmp1_cached as 22 | select 23 | avg(c_acctbal) as avg_acctbal 24 | from 25 | q22_customer_tmp_cached 26 | where 27 | c_acctbal > 0.00; 28 | 29 | create view if not exists q22_orders_tmp_cached as 30 | select 31 | o_custkey 32 | from 33 | orders 34 | group by 35 | o_custkey; 36 | 37 | select 38 | cntrycode, 39 | count(1) as numcust, 40 | sum(c_acctbal) as totacctbal 41 | from ( 42 | select 43 | cntrycode, 44 | c_acctbal, 45 | avg_acctbal 46 | from 47 | q22_customer_tmp1_cached ct1 join ( 48 | select 49 | cntrycode, 50 | c_acctbal 51 | from 52 | q22_orders_tmp_cached ot 53 | right outer join q22_customer_tmp_cached ct 54 | on ct.c_custkey = ot.o_custkey 55 | where 56 | o_custkey is null 57 | ) ct2 58 | ) a 59 | where 60 | c_acctbal > avg_acctbal 61 | group by 62 | cntrycode 63 | order by 64 | cntrycode; 65 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query3.sql: -------------------------------------------------------------------------------- 1 | select 2 | l_orderkey, 3 | sum(l_extendedprice * (1 - l_discount)) as revenue, 4 | o_orderdate, 5 | o_shippriority 6 | from 7 | customer, 8 | orders, 9 | lineitem 10 | where 11 | c_mktsegment = 'BUILDING' 12 | and c_custkey = o_custkey 13 | and l_orderkey = o_orderkey 14 | and o_orderdate < '1995-03-22' 15 | and l_shipdate > '1995-03-22' 16 | group by 17 | l_orderkey, 18 | o_orderdate, 19 | o_shippriority 20 | order by 21 | revenue desc, 22 | o_orderdate 23 | limit 10; 24 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query4.sql: -------------------------------------------------------------------------------- 1 | select 2 | o_orderpriority, 3 | count(*) as order_count 4 | from 5 | orders as o 6 | where 7 | o_orderdate >= '1996-05-01' 8 | and o_orderdate < '1996-08-01' 9 | and exists ( 10 | select 11 | * 12 | from 13 | lineitem 14 | where 15 | l_orderkey = o.o_orderkey 16 | and l_commitdate < l_receiptdate 17 | ) 18 | group by 19 | o_orderpriority 20 | order by 21 | o_orderpriority; 22 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query5.sql: -------------------------------------------------------------------------------- 1 | select 2 | n_name, 3 | sum(l_extendedprice * (1 - l_discount)) as revenue 4 | from 5 | customer, 6 | orders, 7 | lineitem, 8 | supplier, 9 | nation, 10 | region 11 | where 12 | c_custkey = o_custkey 13 | and l_orderkey = o_orderkey 14 | and l_suppkey = s_suppkey 15 | and c_nationkey = s_nationkey 16 | and s_nationkey = n_nationkey 17 | and n_regionkey = r_regionkey 18 | and r_name = 'AFRICA' 19 | and o_orderdate >= '1993-01-01' 20 | and o_orderdate < '1994-01-01' 21 | group by 22 | n_name 23 | order by 24 | revenue desc; 25 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query6.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l_extendedprice * l_discount) as revenue 3 | from 4 | lineitem 5 | where 6 | l_shipdate >= '1993-01-01' 7 | and l_shipdate < '1994-01-01' 8 | and l_discount between 0.06 - 0.01 and 0.06 + 0.01 9 | and l_quantity < 25; 10 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query7.sql: -------------------------------------------------------------------------------- 1 | select 2 | supp_nation, 3 | cust_nation, 4 | l_year, 5 | sum(volume) as revenue 6 | from 7 | ( 8 | select 9 | n1.n_name as supp_nation, 10 | n2.n_name as cust_nation, 11 | year(l_shipdate) as l_year, 12 | l_extendedprice * (1 - l_discount) as volume 13 | from 14 | supplier, 15 | lineitem, 16 | orders, 17 | customer, 18 | nation n1, 19 | nation n2 20 | where 21 | s_suppkey = l_suppkey 22 | and o_orderkey = l_orderkey 23 | and c_custkey = o_custkey 24 | and s_nationkey = n1.n_nationkey 25 | and c_nationkey = n2.n_nationkey 26 | and ( 27 | (n1.n_name = 'KENYA' and n2.n_name = 'PERU') 28 | or (n1.n_name = 'PERU' and n2.n_name = 'KENYA') 29 | ) 30 | and l_shipdate between '1995-01-01' and '1996-12-31' 31 | ) as shipping 32 | group by 33 | supp_nation, 34 | cust_nation, 35 | l_year 36 | order by 37 | supp_nation, 38 | cust_nation, 39 | l_year; 40 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query8.sql: -------------------------------------------------------------------------------- 1 | select 2 | o_year, 3 | sum(case 4 | when nation = 'PERU' then volume 5 | else 0 6 | end) / sum(volume) as mkt_share 7 | from 8 | ( 9 | select 10 | year(o_orderdate) as o_year, 11 | l_extendedprice * (1 - l_discount) as volume, 12 | n2.n_name as nation 13 | from 14 | part, 15 | supplier, 16 | lineitem, 17 | orders, 18 | customer, 19 | nation n1, 20 | nation n2, 21 | region 22 | where 23 | p_partkey = l_partkey 24 | and s_suppkey = l_suppkey 25 | and l_orderkey = o_orderkey 26 | and o_custkey = c_custkey 27 | and c_nationkey = n1.n_nationkey 28 | and n1.n_regionkey = r_regionkey 29 | and r_name = 'AMERICA' 30 | and s_nationkey = n2.n_nationkey 31 | and o_orderdate between '1995-01-01' and '1996-12-31' 32 | and p_type = 'ECONOMY BURNISHED NICKEL' 33 | ) as all_nations 34 | group by 35 | o_year 36 | order by 37 | o_year; 38 | -------------------------------------------------------------------------------- /sample-queries-tpch/tpch_query9.sql: -------------------------------------------------------------------------------- 1 | select 2 | nation, 3 | o_year, 4 | sum(amount) as sum_profit 5 | from 6 | ( 7 | select 8 | n_name as nation, 9 | year(o_orderdate) as o_year, 10 | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount 11 | from 12 | part, 13 | supplier, 14 | lineitem, 15 | partsupp, 16 | orders, 17 | nation 18 | where 19 | s_suppkey = l_suppkey 20 | and ps_suppkey = l_suppkey 21 | and ps_partkey = l_partkey 22 | and p_partkey = l_partkey 23 | and o_orderkey = l_orderkey 24 | and s_nationkey = n_nationkey 25 | and p_name like '%plum%' 26 | ) as profit 27 | group by 28 | nation, 29 | o_year 30 | order by 31 | nation, 32 | o_year desc; 33 | -------------------------------------------------------------------------------- /settings/init.sql: -------------------------------------------------------------------------------- 1 | set hive.map.aggr=true; 2 | set mapreduce.reduce.speculative=false; 3 | set hive.auto.convert.join=true; 4 | set hive.optimize.reducededuplication.min.reducer=1; 5 | set hive.optimize.mapjoin.mapreduce=true; 6 | set hive.stats.autogather=true; 7 | 8 | set mapred.reduce.parallel.copies=30; 9 | set mapred.job.shuffle.input.buffer.percent=0.5; 10 | set mapred.job.reduce.input.buffer.percent=0.2; 11 | set mapred.map.child.java.opts=-server -Xmx2800m -Djava.net.preferIPv4Stack=true; 12 | set mapred.reduce.child.java.opts=-server -Xmx3800m -Djava.net.preferIPv4Stack=true; 13 | set mapreduce.map.memory.mb=3072; 14 | set mapreduce.reduce.memory.mb=4096; 15 | -------------------------------------------------------------------------------- /settings/load-flat.sql: -------------------------------------------------------------------------------- 1 | set hive.enforce.bucketing=true; 2 | set hive.enforce.sorting=true; 3 | set hive.exec.dynamic.partition.mode=nonstrict; 4 | set hive.exec.max.dynamic.partitions.pernode=1000000; 5 | set hive.exec.max.dynamic.partitions=1000000; 6 | set hive.exec.max.created.files=1000000; 7 | 8 | set mapreduce.input.fileinputformat.split.minsize=240000000; 9 | set mapreduce.input.fileinputformat.split.maxsize=240000000; 10 | set mapreduce.input.fileinputformat.split.minsize.per.node=240000000; 11 | set mapreduce.input.fileinputformat.split.minsize.per.rack=240000000; 12 | set hive.exec.parallel=true; 13 | set hive.stats.autogather=true; 14 | -------------------------------------------------------------------------------- /settings/load-partitioned.sql: -------------------------------------------------------------------------------- 1 | set hive.enforce.bucketing=true; 2 | set hive.enforce.sorting=true; 3 | set hive.exec.dynamic.partition.mode=nonstrict; 4 | set hive.exec.max.dynamic.partitions.pernode=100000; 5 | set hive.exec.max.dynamic.partitions=100000; 6 | set hive.exec.max.created.files=1000000; 7 | set hive.exec.parallel=true; 8 | set hive.exec.reducers.max=2000; 9 | set hive.stats.autogather=true; 10 | set hive.optimize.sort.dynamic.partition=true; 11 | 12 | set mapred.job.reduce.input.buffer.percent=0.0; 13 | set mapreduce.input.fileinputformat.split.minsizee=240000000; 14 | set mapreduce.input.fileinputformat.split.minsize.per.node=240000000; 15 | set mapreduce.input.fileinputformat.split.minsize.per.rack=240000000; 16 | 17 | -- set mapred.map.child.java.opts=-server -Xmx2800m -Djava.net.preferIPv4Stack=true; 18 | -- set mapred.reduce.child.java.opts=-server -Xms1024m -Xmx3800m -Djava.net.preferIPv4Stack=true; 19 | -- set mapreduce.map.memory.mb=3072; 20 | -- set mapreduce.reduce.memory.mb=4096; 21 | -- set io.sort.mb=800; 22 | -------------------------------------------------------------------------------- /tpcds-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Check for all the stuff I need to function. 4 | for f in gcc javac; do 5 | which $f > /dev/null 2>&1 6 | if [ $? -ne 0 ]; then 7 | echo "Required program $f is missing. Please install or fix your path and try again." 8 | exit 1 9 | fi 10 | done 11 | 12 | # Check if Maven is installed and install it if not. 13 | which mvn > /dev/null 2>&1 14 | if [ $? -ne 0 ]; then 15 | SKIP=0 16 | if [ -e "apache-maven-3.0.5-bin.tar.gz" ]; then 17 | SIZE=`du -b apache-maven-3.0.5-bin.tar.gz | cut -f 1` 18 | if [ $SIZE -eq 5144659 ]; then 19 | SKIP=1 20 | fi 21 | fi 22 | if [ $SKIP -ne 1 ]; then 23 | echo "Maven not found, automatically installing it." 24 | curl -O http://www.us.apache.org/dist/maven/maven-3/3.0.5/binaries/apache-maven-3.0.5-bin.tar.gz 2> /dev/null 25 | if [ $? -ne 0 ]; then 26 | echo "Failed to download Maven, check Internet connectivity and try again." 27 | exit 1 28 | fi 29 | fi 30 | tar -zxf apache-maven-3.0.5-bin.tar.gz > /dev/null 31 | CWD=$(pwd) 32 | export MAVEN_HOME="$CWD/apache-maven-3.0.5" 33 | export PATH=$PATH:$MAVEN_HOME/bin 34 | fi 35 | 36 | echo "Building TPC-DS Data Generator" 37 | (cd tpcds-gen; make) 38 | echo "TPC-DS Data Generator built, you can now use tpcds-setup.sh to generate data." 39 | -------------------------------------------------------------------------------- /tpcds-gen/Makefile: -------------------------------------------------------------------------------- 1 | MYOS=$(shell uname -s) 2 | 3 | all: target/lib/dsdgen.jar target/tpcds-gen-1.0-SNAPSHOT.jar 4 | 5 | target/tpcds-gen-1.0-SNAPSHOT.jar: $(shell find -name *.java) 6 | mvn package 7 | 8 | target/tpcds_kit.zip: tpcds_kit.zip 9 | mkdir -p target/ 10 | cp tpcds_kit.zip target/tpcds_kit.zip 11 | 12 | tpcds_kit.zip: 13 | curl --output tpcds_kit.zip http://www.tpc.org/tpcds/dsgen/dsgen-download-files.asp?download_key=NaN 14 | 15 | target/lib/dsdgen.jar: target/tools/dsdgen 16 | cd target/; mkdir -p lib/; ( jar cvf lib/dsdgen.jar tools/ || gjar cvf lib/dsdgen.jar tools/ ) 17 | 18 | target/tools/dsdgen: target/tpcds_kit.zip 19 | test -d target/tools/ || (cd target; unzip tpcds_kit.zip; mv TPC*/tools tools; cd tools; cat ../../patches/all/*.patch | patch -p0 ) 20 | cd target/tools; cat ../../patches/${MYOS}/*.patch | patch -p1 21 | cd target/tools; make clean; make dsdgen 22 | 23 | clean: 24 | mvn clean 25 | -------------------------------------------------------------------------------- /tpcds-gen/README.md: -------------------------------------------------------------------------------- 1 | Mapreduce TPC-DS Generator 2 | ========================== 3 | 4 | This simplifies creating tpc-ds data-sets on large scales on a hadoop cluster. 5 | 6 | To get set up, you need to run 7 | 8 | $ make 9 | 10 | this will download the TPC-DS dsgen program, compile it and use maven to build the MR app wrapped around it. 11 | 12 | To generate the data-sets, you need to run (say, for scale = 200, parallelism = 100) 13 | 14 | $ hadoop jar target/tpcds-gen-1.0-SNAPSHOT.jar -d /tmp/store_sales/200/ -p 100 -s 200 15 | 16 | This uses the existing parallelism in the driver.c of TPC-DS without modification and uses it to run the command on multiple machines instead of running in local fork mode. 17 | 18 | The command generates multiple files for each map task, resulting in each table having its own subdirectory. 19 | 20 | Assumptions made are that all machines in the cluster are OS/arch/lib identical. 21 | -------------------------------------------------------------------------------- /tpcds-gen/patches/all/tpcds-buffered.patch: -------------------------------------------------------------------------------- 1 | diff --git print.c print.c 2 | index 1b64362..5108bd7 100644 3 | --- print.c 4 | +++ print.c 5 | @@ -68,6 +68,7 @@ print_close(int tbl) 6 | fpOutfile = NULL; 7 | if (pTdef->outfile) 8 | { 9 | + fflush(pTdef->outfile); 10 | fclose(pTdef->outfile); 11 | pTdef->outfile = NULL; 12 | } 13 | @@ -536,7 +538,7 @@ print_end (int tbl) 14 | if (add_term) 15 | fwrite(term, 1, add_term, fpOutfile); 16 | fprintf (fpOutfile, "\n"); 17 | - fflush(fpOutfile); 18 | + //fflush(fpOutfile); 19 | 20 | return (res); 21 | } 22 | -------------------------------------------------------------------------------- /tpcds-gen/patches/all/tpcds-strcpy.patch: -------------------------------------------------------------------------------- 1 | diff --git r_params.c r_params.c 2 | index 4db16e5..9b1a8e6 100644 3 | --- r_params.c 4 | +++ r_params.c 5 | @@ -46,7 +46,7 @@ 6 | #include "tdefs.h" 7 | #include "release.h" 8 | 9 | -#define PARAM_MAX_LEN 80 10 | +#define PARAM_MAX_LEN PATH_MAX 11 | 12 | #ifndef TEST 13 | extern option_t options[]; 14 | @@ -275,7 +275,7 @@ set_str(char *var, char *val) 15 | nParam = fnd_param(var); 16 | if (nParam >= 0) 17 | { 18 | - strcpy(params[options[nParam].index], val); 19 | + strncpy(params[options[nParam].index], val, PARAM_MAX_LEN); 20 | options[nParam].flags |= OPT_SET; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /tpcds-gen/patches/all/tpcds_misspelled_header_guard.patch: -------------------------------------------------------------------------------- 1 | --- w_store_sales.h.orig 2014-06-25 10:58:19.000000000 -0700 2 | +++ w_store_sales.h 2014-06-25 10:58:51.000000000 -0700 3 | @@ -34,7 +34,7 @@ 4 | * Gradient Systems 5 | */ 6 | #ifndef W_STORE_SALES_H 7 | -#define W_STORE_SLAES_H 8 | +#define W_STORE_SALES_H 9 | 10 | #include "constants.h" 11 | #include "pricing.h" 12 | -------------------------------------------------------------------------------- /tpcds-gen/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | 8 | 4.0.0 9 | 10 | org.notmysock.tpcds 11 | tpcds-gen 12 | 1.0-SNAPSHOT 13 | jar 14 | 15 | tpcds-gen 16 | http://maven.apache.org 17 | 18 | 19 | 20 | org.apache.hadoop 21 | hadoop-client 22 | 2.2.0 23 | compile 24 | 25 | 26 | commons-cli 27 | commons-cli 28 | 1.1 29 | compile 30 | 31 | 32 | org.mockito 33 | mockito-core 34 | 1.8.5 35 | test 36 | 37 | 38 | junit 39 | junit 40 | 4.7 41 | test 42 | 43 | 44 | 45 | 46 | 47 | 48 | maven-compiler-plugin 49 | 50 | 1.6 51 | 1.6 52 | 53 | 54 | 55 | org.apache.maven.plugins 56 | maven-jar-plugin 57 | 58 | 59 | 60 | true 61 | lib/ 62 | org.notmysock.tpcds.GenTable 63 | 64 | 65 | 66 | 67 | 68 | org.apache.maven.plugins 69 | maven-dependency-plugin 70 | 71 | 72 | copy-dependencies 73 | package 74 | 75 | copy-dependencies 76 | 77 | 78 | ${project.build.directory}/lib 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /tpch-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Check for all the stuff I need to function. 4 | for f in gcc javac; do 5 | which $f > /dev/null 2>&1 6 | if [ $? -ne 0 ]; then 7 | echo "Required program $f is missing. Please install or fix your path and try again." 8 | exit 1 9 | fi 10 | done 11 | 12 | # Check if Maven is installed and install it if not. 13 | which mvn > /dev/null 2>&1 14 | if [ $? -ne 0 ]; then 15 | SKIP=0 16 | if [ -e "apache-maven-3.0.5-bin.tar.gz" ]; then 17 | SIZE=`du -b apache-maven-3.0.5-bin.tar.gz | cut -f 1` 18 | if [ $SIZE -eq 5144659 ]; then 19 | SKIP=1 20 | fi 21 | fi 22 | if [ $SKIP -ne 1 ]; then 23 | echo "Maven not found, automatically installing it." 24 | curl -O http://www.us.apache.org/dist/maven/maven-3/3.0.5/binaries/apache-maven-3.0.5-bin.tar.gz 2> /dev/null 25 | if [ $? -ne 0 ]; then 26 | echo "Failed to download Maven, check Internet connectivity and try again." 27 | exit 1 28 | fi 29 | fi 30 | tar -zxf apache-maven-3.0.5-bin.tar.gz > /dev/null 31 | CWD=$(pwd) 32 | export MAVEN_HOME="$CWD/apache-maven-3.0.5" 33 | export PATH=$PATH:$MAVEN_HOME/bin 34 | fi 35 | 36 | echo "Building TPC-H Data Generator" 37 | (cd tpch-gen; make) 38 | echo "TPC-H Data Generator built, you can now use tpch-setup.sh to generate data." 39 | -------------------------------------------------------------------------------- /tpch-gen/Makefile: -------------------------------------------------------------------------------- 1 | MYOS=$(shell uname -s) 2 | 3 | all: target/lib/dbgen.jar target/tpch-gen-1.0-SNAPSHOT.jar 4 | 5 | target/tpch-gen-1.0-SNAPSHOT.jar: $(shell find -name *.java) 6 | mvn package 7 | 8 | target/tpch_kit.zip: tpch_kit.zip 9 | mkdir -p target/ 10 | cp tpch_kit.zip target/tpch_kit.zip 11 | 12 | tpch_kit.zip: 13 | curl --output tpch_kit.zip http://www.tpc.org/tpch/spec/tpch_2_16_0.zip 14 | 15 | target/lib/dbgen.jar: target/tools/dbgen 16 | cd target/; mkdir -p lib/; ( jar cvf lib/dbgen.jar tools/ || gjar cvf lib/dbgen.jar tools/ ) 17 | 18 | target/tools/dbgen: target/tpch_kit.zip 19 | test -d target/tools/ || (cd target; unzip tpch_kit.zip -x __MACOSX/; ln -sf $$PWD/*/dbgen/ tools) 20 | cd target/tools; cat ../../../patches/${MYOS}/*.patch | patch -p0 21 | cd target/tools; make -f makefile.suite clean; make -f makefile.suite CC=gcc DATABASE=ORACLE MACHINE=LINUX WORKLOAD=TPCH 22 | 23 | clean: 24 | mvn clean 25 | -------------------------------------------------------------------------------- /tpch-gen/README.md: -------------------------------------------------------------------------------- 1 | Mapreduce TPC-H Generator 2 | ========================= 3 | 4 | This simplifies creating tpc-h data-sets on large scales on a hadoop cluster. 5 | 6 | To get set up, you need to run 7 | 8 | $ make 9 | 10 | this will download the TPC-h dbgen program, compile it and use maven to build the MR app wrapped around it. 11 | 12 | To generate the data-sets, you need to run (say, for scale = 200, parallelism = 100) 13 | 14 | $ hadoop jar target/tpch-gen-1.0-SNAPSHOT.jar -d /user/hive/external/200/ -p 100 -s 200 15 | 16 | This uses the existing parallelism in the dbgen program without modification and uses it to run the command on multiple machines. 17 | 18 | The command generates multiple files for each map task, resulting in each table having its own subdirectory. 19 | 20 | Assumptions made are that all machines in the cluster are OS/arch/lib identical. 21 | -------------------------------------------------------------------------------- /tpch-gen/ddl/orc.sql: -------------------------------------------------------------------------------- 1 | set hive.stats.autogather=true; 2 | set hive.stats.dbclass=fs; 3 | 4 | create table if not exists lineitem 5 | (L_ORDERKEY BIGINT, 6 | L_PARTKEY BIGINT, 7 | L_SUPPKEY BIGINT, 8 | L_LINENUMBER INT, 9 | L_QUANTITY DOUBLE, 10 | L_EXTENDEDPRICE DOUBLE, 11 | L_DISCOUNT DOUBLE, 12 | L_TAX DOUBLE, 13 | L_RETURNFLAG STRING, 14 | L_LINESTATUS STRING, 15 | L_SHIPDATE STRING, 16 | L_COMMITDATE STRING, 17 | L_RECEIPTDATE STRING, 18 | L_SHIPINSTRUCT STRING, 19 | L_SHIPMODE STRING, 20 | L_COMMENT STRING) 21 | STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") 22 | ; 23 | 24 | create table if not exists part (P_PARTKEY INT, 25 | P_NAME STRING, 26 | P_MFGR STRING, 27 | P_BRAND STRING, 28 | P_TYPE STRING, 29 | P_SIZE INT, 30 | P_CONTAINER STRING, 31 | P_RETAILPRICE DOUBLE, 32 | P_COMMENT STRING) 33 | STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") 34 | ; 35 | 36 | create table if not exists supplier (S_SUPPKEY BIGINT, 37 | S_NAME STRING, 38 | S_ADDRESS STRING, 39 | S_NATIONKEY INT, 40 | S_PHONE STRING, 41 | S_ACCTBAL DOUBLE, 42 | S_COMMENT STRING) 43 | STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") 44 | ; 45 | 46 | create table if not exists partsupp (PS_PARTKEY BIGINT, 47 | PS_SUPPKEY BIGINT, 48 | PS_AVAILQTY INT, 49 | PS_SUPPLYCOST DOUBLE, 50 | PS_COMMENT STRING) 51 | STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") 52 | ; 53 | 54 | create table if not exists nation (N_NATIONKEY INT, 55 | N_NAME STRING, 56 | N_REGIONKEY INT, 57 | N_COMMENT STRING) 58 | STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") 59 | ; 60 | 61 | create table if not exists region (R_REGIONKEY INT, 62 | R_NAME STRING, 63 | R_COMMENT STRING) 64 | STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") 65 | ; 66 | 67 | create table if not exists customer (C_CUSTKEY BIGINT, 68 | C_NAME STRING, 69 | C_ADDRESS STRING, 70 | C_NATIONKEY INT, 71 | C_PHONE STRING, 72 | C_ACCTBAL DOUBLE, 73 | C_MKTSEGMENT STRING, 74 | C_COMMENT STRING) 75 | STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") 76 | ; 77 | 78 | create table if not exists orders (O_ORDERKEY BIGINT, 79 | O_CUSTKEY BIGINT, 80 | O_ORDERSTATUS STRING, 81 | O_TOTALPRICE DOUBLE, 82 | O_ORDERDATE STRING, 83 | O_ORDERPRIORITY STRING, 84 | O_CLERK STRING, 85 | O_SHIPPRIORITY INT, 86 | O_COMMENT STRING) 87 | STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") 88 | ; 89 | 90 | insert overwrite table nation select * from ${SOURCE}.nation; 91 | insert overwrite table region select * from ${SOURCE}.region; 92 | insert overwrite table part select * from ${SOURCE}.part; 93 | insert overwrite table supplier select * from ${SOURCE}.supplier; 94 | insert overwrite table partsupp select * from ${SOURCE}.partsupp; 95 | insert overwrite table customer select * from ${SOURCE}.customer; 96 | insert overwrite table lineitem select * from ${SOURCE}.lineitem; 97 | insert overwrite table orders select * from ${SOURCE}.orders; 98 | -------------------------------------------------------------------------------- /tpch-gen/ddl/text.sql: -------------------------------------------------------------------------------- 1 | create external table lineitem 2 | (L_ORDERKEY BIGINT, 3 | L_PARTKEY BIGINT, 4 | L_SUPPKEY BIGINT, 5 | L_LINENUMBER INT, 6 | L_QUANTITY DOUBLE, 7 | L_EXTENDEDPRICE DOUBLE, 8 | L_DISCOUNT DOUBLE, 9 | L_TAX DOUBLE, 10 | L_RETURNFLAG STRING, 11 | L_LINESTATUS STRING, 12 | L_SHIPDATE STRING, 13 | L_COMMITDATE STRING, 14 | L_RECEIPTDATE STRING, 15 | L_SHIPINSTRUCT STRING, 16 | L_SHIPMODE STRING, 17 | L_COMMENT STRING) 18 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 19 | LOCATION '${LOCATION}/lineitem'; 20 | 21 | create external table part (P_PARTKEY BIGINT, 22 | P_NAME STRING, 23 | P_MFGR STRING, 24 | P_BRAND STRING, 25 | P_TYPE STRING, 26 | P_SIZE INT, 27 | P_CONTAINER STRING, 28 | P_RETAILPRICE DOUBLE, 29 | P_COMMENT STRING) 30 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 31 | LOCATION '${LOCATION}/part/'; 32 | 33 | create external table supplier (S_SUPPKEY BIGINT, 34 | S_NAME STRING, 35 | S_ADDRESS STRING, 36 | S_NATIONKEY INT, 37 | S_PHONE STRING, 38 | S_ACCTBAL DOUBLE, 39 | S_COMMENT STRING) 40 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 41 | LOCATION '${LOCATION}/supplier/'; 42 | 43 | create external table partsupp (PS_PARTKEY BIGINT, 44 | PS_SUPPKEY BIGINT, 45 | PS_AVAILQTY INT, 46 | PS_SUPPLYCOST DOUBLE, 47 | PS_COMMENT STRING) 48 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 49 | LOCATION'${LOCATION}/partsupp'; 50 | 51 | create external table nation (N_NATIONKEY INT, 52 | N_NAME STRING, 53 | N_REGIONKEY INT, 54 | N_COMMENT STRING) 55 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 56 | LOCATION '${LOCATION}/nation'; 57 | 58 | create external table region (R_REGIONKEY INT, 59 | R_NAME STRING, 60 | R_COMMENT STRING) 61 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 62 | LOCATION '${LOCATION}/region'; 63 | 64 | create external table customer (C_CUSTKEY BIGINT, 65 | C_NAME STRING, 66 | C_ADDRESS STRING, 67 | C_NATIONKEY INT, 68 | C_PHONE STRING, 69 | C_ACCTBAL DOUBLE, 70 | C_MKTSEGMENT STRING, 71 | C_COMMENT STRING) 72 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 73 | LOCATION '${LOCATION}/customer'; 74 | 75 | create external table orders (O_ORDERKEY BIGINT, 76 | O_CUSTKEY BIGINT, 77 | O_ORDERSTATUS STRING, 78 | O_TOTALPRICE DOUBLE, 79 | O_ORDERDATE STRING, 80 | O_ORDERPRIORITY STRING, 81 | O_CLERK STRING, 82 | O_SHIPPRIORITY INT, 83 | O_COMMENT STRING) 84 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE 85 | LOCATION '${LOCATION}/orders'; 86 | -------------------------------------------------------------------------------- /tpch-gen/patches/Darwin/macosx.patch: -------------------------------------------------------------------------------- 1 | --- makefile.suite.orig 2014-06-25 15:40:27.000000000 -0700 2 | +++ makefile.suite 2014-06-25 15:42:03.000000000 -0700 3 | @@ -110,7 +110,7 @@ 4 | MACHINE = 5 | WORKLOAD = 6 | # 7 | -CFLAGS = -g -DDBNAME=\"dss\" -D$(MACHINE) -D$(DATABASE) -D$(WORKLOAD) -DRNG_TEST -D_FILE_OFFSET_BITS=64 8 | +CFLAGS = -g -DDBNAME=\"dss\" -D$(MACHINE) -D$(DATABASE) -D$(WORKLOAD) -DRNG_TEST -D_FILE_OFFSET_BITS=64 -I/usr/include/malloc 9 | LDFLAGS = -O 10 | # The OBJ,EXE and LIB macros will need to be changed for compilation under 11 | # Windows NT 12 | -------------------------------------------------------------------------------- /tpch-gen/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | 8 | 4.0.0 9 | 10 | org.notmysock.tpch 11 | tpch-gen 12 | 1.0-SNAPSHOT 13 | jar 14 | 15 | tpch-gen 16 | http://maven.apache.org 17 | 18 | 19 | 20 | org.apache.hadoop 21 | hadoop-client 22 | 2.4.0 23 | compile 24 | 25 | 26 | commons-cli 27 | commons-cli 28 | 1.1 29 | compile 30 | 31 | 32 | org.mockito 33 | mockito-core 34 | 1.8.5 35 | test 36 | 37 | 38 | junit 39 | junit 40 | 4.7 41 | test 42 | 43 | 44 | 45 | 46 | 47 | 48 | maven-compiler-plugin 49 | 50 | 1.6 51 | 1.6 52 | 53 | 54 | 55 | org.apache.maven.plugins 56 | maven-jar-plugin 57 | 58 | 59 | 60 | true 61 | lib/ 62 | org.notmysock.tpch.GenTable 63 | 64 | 65 | 66 | 67 | 68 | org.apache.maven.plugins 69 | maven-dependency-plugin 70 | 71 | 72 | copy-dependencies 73 | package 74 | 75 | copy-dependencies 76 | 77 | 78 | ${project.build.directory}/lib 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /tpch-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function usage { 4 | echo "Usage: tpch-setup.sh scale_factor [temp_directory]" 5 | exit 1 6 | } 7 | 8 | function runcommand { 9 | if [ "X$DEBUG_SCRIPT" != "X" ]; then 10 | $1 11 | else 12 | $1 2>/dev/null 13 | fi 14 | } 15 | 16 | if [ ! -f tpch-gen/target/tpch-gen-1.0-SNAPSHOT.jar ]; then 17 | echo "Please build the data generator with ./tpch-build.sh first" 18 | exit 1 19 | fi 20 | which hive > /dev/null 2>&1 21 | if [ $? -ne 0 ]; then 22 | echo "Script must be run where Hive is installed" 23 | exit 1 24 | fi 25 | 26 | # Tables in the TPC-H schema. 27 | TABLES="part partsupp supplier customer orders lineitem nation region" 28 | 29 | # Get the parameters. 30 | SCALE=$1 31 | DIR=$2 32 | BUCKETS=13 33 | if [ "X$DEBUG_SCRIPT" != "X" ]; then 34 | set -x 35 | fi 36 | 37 | # Sanity checking. 38 | if [ X"$SCALE" = "X" ]; then 39 | usage 40 | fi 41 | if [ X"$DIR" = "X" ]; then 42 | DIR=/tmp/tpch-generate 43 | fi 44 | if [ $SCALE -eq 1 ]; then 45 | echo "Scale factor must be greater than 1" 46 | exit 1 47 | fi 48 | 49 | # Do the actual data load. 50 | hdfs dfs -mkdir -p ${DIR} 51 | hdfs dfs -ls ${DIR}/${SCALE} > /dev/null 52 | if [ $? -ne 0 ]; then 53 | echo "Generating data at scale factor $SCALE." 54 | (cd tpch-gen; hadoop jar target/*.jar -d ${DIR}/${SCALE}/ -s ${SCALE}) 55 | fi 56 | hdfs dfs -ls ${DIR}/${SCALE} > /dev/null 57 | if [ $? -ne 0 ]; then 58 | echo "Data generation failed, exiting." 59 | exit 1 60 | fi 61 | echo "TPC-H text data generation complete." 62 | 63 | # Create the text/flat tables as external tables. These will be later be converted to ORCFile. 64 | echo "Loading text data into external tables." 65 | runcommand "hive -i settings/load-flat.sql -f ddl-tpch/bin_flat/alltables.sql -d DB=tpch_text_${SCALE} -d LOCATION=${DIR}/${SCALE}" 66 | 67 | # Create the optimized tables. 68 | i=1 69 | total=8 70 | DATABASE=tpch_flat_orc_${SCALE} 71 | for t in ${TABLES} 72 | do 73 | echo "Optimizing table $t ($i/$total)." 74 | COMMAND="hive -i settings/load-flat.sql -f ddl-tpch/bin_flat/${t}.sql \ 75 | -d DB=${DATABASE} \ 76 | -d SOURCE=tpch_text_${SCALE} -d BUCKETS=${BUCKETS} \ 77 | -d FILE=orc" 78 | runcommand "$COMMAND" 79 | if [ $? -ne 0 ]; then 80 | echo "Command failed, try 'export DEBUG_SCRIPT=ON' and re-running" 81 | exit 1 82 | fi 83 | i=`expr $i + 1` 84 | done 85 | 86 | echo "Data loaded into database ${DATABASE}." 87 | --------------------------------------------------------------------------------