├── .gitignore ├── LICENSE ├── README.md ├── README_ZH.md ├── flink-tpcds ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── ververica │ │ │ └── flink │ │ │ └── benchmark │ │ │ ├── AnalyzeTableRunner.java │ │ │ ├── Benchmark.java │ │ │ ├── QueryUtil.java │ │ │ └── Runner.java │ └── resources │ │ └── queries │ │ ├── q1.sql │ │ ├── q10.sql │ │ ├── q11.sql │ │ ├── q12.sql │ │ ├── q13.sql │ │ ├── q14a.sql │ │ ├── q14b.sql │ │ ├── q15.sql │ │ ├── q16.sql │ │ ├── q17.sql │ │ ├── q18.sql │ │ ├── q19.sql │ │ ├── q2.sql │ │ ├── q20.sql │ │ ├── q21.sql │ │ ├── q22.sql │ │ ├── q23a.sql │ │ ├── q23b.sql │ │ ├── q24.sql │ │ ├── q25.sql │ │ ├── q26.sql │ │ ├── q27.sql │ │ ├── q28.sql │ │ ├── q29.sql │ │ ├── q3.sql │ │ ├── q30.sql │ │ ├── q31.sql │ │ ├── q32.sql │ │ ├── q33.sql │ │ ├── q34.sql │ │ ├── q35.sql │ │ ├── q36.sql │ │ ├── q37.sql │ │ ├── q38.sql │ │ ├── q39a.sql │ │ ├── q39b.sql │ │ ├── q4.sql │ │ ├── q40.sql │ │ ├── q41.sql │ │ ├── q42.sql │ │ ├── q43.sql │ │ ├── q44.sql │ │ ├── q45.sql │ │ ├── q46.sql │ │ ├── q47.sql │ │ ├── q48.sql │ │ ├── q49.sql │ │ ├── q5.sql │ │ ├── q50.sql │ │ ├── q51.sql │ │ ├── q52.sql │ │ ├── q53.sql │ │ ├── q54.sql │ │ ├── q55.sql │ │ ├── q56.sql │ │ ├── q57.sql │ │ ├── q58.sql │ │ ├── q59.sql │ │ ├── q6.sql │ │ ├── q60.sql │ │ ├── q61.sql │ │ ├── q62.sql │ │ ├── q63.sql │ │ ├── q64.sql │ │ ├── q65.sql │ │ ├── q66.sql │ │ ├── q67.sql │ │ ├── q68.sql │ │ ├── q69.sql │ │ ├── q7.sql │ │ ├── q70.sql │ │ ├── q71.sql │ │ ├── q72.sql │ │ ├── q73.sql │ │ ├── q74.sql │ │ ├── q75.sql │ │ ├── q76.sql │ │ ├── q77.sql │ │ ├── q78.sql │ │ ├── q79.sql │ │ ├── q8.sql │ │ ├── q80.sql │ │ ├── q81.sql │ │ ├── q82.sql │ │ ├── q83.sql │ │ ├── q84.sql │ │ ├── q85.sql │ │ ├── q86.sql │ │ ├── q87.sql │ │ ├── q88.sql │ │ ├── q89.sql │ │ ├── q9.sql │ │ ├── q90.sql │ │ ├── q91.sql │ │ ├── q92.sql │ │ ├── q93.sql │ │ ├── q94.sql │ │ ├── q95.sql │ │ ├── q96.sql │ │ ├── q97.sql │ │ ├── q98.sql │ │ └── q99.sql │ └── test │ └── java │ └── com │ └── ververica │ └── flink │ └── benchmark │ └── QueryUtilTest.java ├── hive-tpcds-setup ├── Makefile ├── ddl-tpcds │ ├── bin │ │ ├── add_constraints.sql │ │ ├── alltables.sql │ │ ├── analyze.sql │ │ ├── call_center.sql │ │ ├── catalog_page.sql │ │ ├── catalog_returns.sql │ │ ├── catalog_sales.sql │ │ ├── customer.sql │ │ ├── customer_address.sql │ │ ├── customer_demographics.sql │ │ ├── date_dim.sql │ │ ├── household_demographics.sql │ │ ├── income_band.sql │ │ ├── inventory.sql │ │ ├── item.sql │ │ ├── promotion.sql │ │ ├── reason.sql │ │ ├── ship_mode.sql │ │ ├── store.sql │ │ ├── store_returns.sql │ │ ├── store_sales.sql │ │ ├── time_dim.sql │ │ ├── warehouse.sql │ │ ├── web_page.sql │ │ ├── web_returns.sql │ │ ├── web_sales.sql │ │ └── web_site.sql │ ├── bin_partitioned │ │ ├── alltables.sql │ │ ├── call_center.sql │ │ ├── catalog_page.sql │ │ ├── catalog_returns.sql │ │ ├── catalog_sales.sql │ │ ├── create_alltables.sql │ │ ├── customer.sql │ │ ├── customer_address.sql │ │ ├── customer_demographics.sql │ │ ├── date_dim.sql │ │ ├── household_demographics.sql │ │ ├── income_band.sql │ │ ├── inventory.sql │ │ ├── item.sql │ │ ├── promotion.sql │ │ ├── reason.sql │ │ ├── ship_mode.sql │ │ ├── store.sql │ │ ├── store_returns.sql │ │ ├── store_sales.sql │ │ ├── time_dim.sql │ │ ├── warehouse.sql │ │ ├── web_page.sql │ │ ├── web_returns.sql │ │ ├── web_sales.sql │ │ └── web_site.sql │ └── text │ │ └── alltables.sql ├── patches │ ├── Darwin │ │ ├── macosx.2.patch │ │ └── macosx.patch │ └── all │ │ ├── tpcds-buffered.patch │ │ ├── tpcds-strcpy.patch │ │ └── tpcds_misspelled_header_guard.patch ├── pom.xml ├── src │ └── main │ │ └── java │ │ └── com │ │ └── ververica │ │ └── benchmark │ │ └── GenTable.java ├── tpcds-build.sh ├── tpcds-setup.sh └── tpcds-setup_partitioned.sh ├── pom.xml └── tools ├── common ├── env.sh └── flink-conf.yaml ├── datagen ├── init_db_for_none_partition_tables.sh └── init_db_for_partition_tables.sh ├── flink └── run_query.sh └── stats └── analyze_table_stats.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | *.class 3 | 4 | # Log file 5 | *.log 6 | 7 | # BlueJ files 8 | *.ctxt 9 | 10 | # Mobile Tools for Java (J2ME) 11 | .mtj.tmp/ 12 | 13 | # Package Files # 14 | *.jar 15 | *.war 16 | *.nar 17 | *.ear 18 | *.zip 19 | *.tar.gz 20 | *.rar 21 | 22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 23 | hs_err_pid* 24 | 25 | .cache 26 | scalastyle-output.xml 27 | .classpath 28 | .idea 29 | .metadata 30 | .settings 31 | .project 32 | .version.properties 33 | filter.properties 34 | logs.zip 35 | target 36 | tmp 37 | *.iml 38 | *.swp 39 | *.pyc 40 | .DS_Store 41 | atlassian-ide-plugin.xml 42 | out/ 43 | *.ipr 44 | *.iws 45 | load_*_*.mk -------------------------------------------------------------------------------- /flink-tpcds/src/main/java/com/ververica/flink/benchmark/QueryUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package com.ververica.flink.benchmark; 19 | 20 | import java.io.BufferedInputStream; 21 | import java.io.ByteArrayOutputStream; 22 | import java.io.File; 23 | import java.io.FileInputStream; 24 | import java.io.FileNotFoundException; 25 | import java.io.IOException; 26 | import java.io.InputStream; 27 | import java.nio.charset.StandardCharsets; 28 | import java.util.Arrays; 29 | import java.util.LinkedHashMap; 30 | import java.util.List; 31 | import java.util.function.Supplier; 32 | import java.util.stream.Stream; 33 | 34 | import static java.util.Objects.requireNonNull; 35 | 36 | class QueryUtil { 37 | 38 | static LinkedHashMap getQueries(String location, String queries) { 39 | LinkedHashMap> sql = new LinkedHashMap<>(); 40 | List queryList = queries == null ? null : Arrays.asList(queries.split(",")); 41 | if (location == null) { 42 | for (int i = 1; i < 100; i++) { 43 | String name = "q" + i + ".sql"; 44 | ClassLoader cl = Benchmark.class.getClassLoader(); 45 | String path = "queries/" + name; 46 | if (cl.getResource(path) == null) { 47 | String a = "q" + i + "a.sql"; 48 | sql.put(a, () -> cl.getResourceAsStream("queries/" + a)); 49 | String b = "q" + i + "b.sql"; 50 | sql.put(b, () -> cl.getResourceAsStream("queries/" + b)); 51 | } else { 52 | sql.put(name, () -> cl.getResourceAsStream(path)); 53 | } 54 | } 55 | } else { 56 | Stream files = queryList == null ? 57 | Arrays.stream(requireNonNull(new File(location).listFiles())) : 58 | queryList.stream().map(file -> new File(location, file)); 59 | files.forEach(file -> sql.put(file.getName(), () -> { 60 | try { 61 | return new FileInputStream(file); 62 | } catch (FileNotFoundException e) { 63 | return null; 64 | } 65 | })); 66 | } 67 | LinkedHashMap ret = new LinkedHashMap<>(); 68 | sql.forEach((name, supplier) -> { 69 | if (queryList == null || queryList.contains(name)) { 70 | InputStream in = supplier.get(); 71 | if (in != null) { 72 | ret.put(name, streamToString(in)); 73 | } 74 | } 75 | }); 76 | return ret; 77 | } 78 | 79 | private static String streamToString(InputStream inputStream) { 80 | BufferedInputStream in = new BufferedInputStream(inputStream); 81 | ByteArrayOutputStream outStream = new ByteArrayOutputStream(); 82 | try { 83 | int c; 84 | while ((c = in.read()) != -1) { 85 | outStream.write(c); 86 | } 87 | } catch (IOException e) { 88 | throw new RuntimeException(e); 89 | } finally { 90 | try { 91 | in.close(); 92 | } catch (IOException ignored) { 93 | } 94 | } 95 | return new String(outStream.toByteArray(), StandardCharsets.UTF_8); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q1.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query1.tpl and seed 2031708268 2 | with customer_total_return as 3 | (select sr_customer_sk as ctr_customer_sk 4 | ,sr_store_sk as ctr_store_sk 5 | ,sum(sr_fee) as ctr_total_return 6 | from store_returns 7 | ,date_dim 8 | where sr_returned_date_sk = d_date_sk 9 | and d_year =2000 10 | group by sr_customer_sk 11 | ,sr_store_sk) 12 | select c_customer_id 13 | from customer_total_return ctr1 14 | ,store 15 | ,customer 16 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 17 | from customer_total_return ctr2 18 | where ctr1.ctr_store_sk = ctr2.ctr_store_sk) 19 | and s_store_sk = ctr1.ctr_store_sk 20 | and s_state = 'NM' 21 | and ctr1.ctr_customer_sk = c_customer_sk 22 | order by c_customer_id 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query1.tpl 26 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q10.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query10.tpl and seed 797269820 2 | select 3 | cd_gender, 4 | cd_marital_status, 5 | cd_education_status, 6 | count(*) cnt1, 7 | cd_purchase_estimate, 8 | count(*) cnt2, 9 | cd_credit_rating, 10 | count(*) cnt3, 11 | cd_dep_count, 12 | count(*) cnt4, 13 | cd_dep_employed_count, 14 | count(*) cnt5, 15 | cd_dep_college_count, 16 | count(*) cnt6 17 | from 18 | customer c,customer_address ca,customer_demographics 19 | where 20 | c.c_current_addr_sk = ca.ca_address_sk and 21 | ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and 22 | cd_demo_sk = c.c_current_cdemo_sk and 23 | exists (select * 24 | from store_sales,date_dim 25 | where c.c_customer_sk = ss_customer_sk and 26 | ss_sold_date_sk = d_date_sk and 27 | d_year = 2002 and 28 | d_moy between 4 and 4+3) and 29 | (exists (select * 30 | from web_sales,date_dim 31 | where c.c_customer_sk = ws_bill_customer_sk and 32 | ws_sold_date_sk = d_date_sk and 33 | d_year = 2002 and 34 | d_moy between 4 ANd 4+3) or 35 | exists (select * 36 | from catalog_sales,date_dim 37 | where c.c_customer_sk = cs_ship_customer_sk and 38 | cs_sold_date_sk = d_date_sk and 39 | d_year = 2002 and 40 | d_moy between 4 and 4+3)) 41 | group by cd_gender, 42 | cd_marital_status, 43 | cd_education_status, 44 | cd_purchase_estimate, 45 | cd_credit_rating, 46 | cd_dep_count, 47 | cd_dep_employed_count, 48 | cd_dep_college_count 49 | order by cd_gender, 50 | cd_marital_status, 51 | cd_education_status, 52 | cd_purchase_estimate, 53 | cd_credit_rating, 54 | cd_dep_count, 55 | cd_dep_employed_count, 56 | cd_dep_college_count 57 | limit 100 58 | 59 | -- end query 1 in stream 0 using template query10.tpl 60 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q11.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query11.tpl and seed 1819994127 2 | with year_total as ( 3 | select c_customer_id customer_id 4 | ,c_first_name customer_first_name 5 | ,c_last_name customer_last_name 6 | ,c_preferred_cust_flag 7 | ,c_birth_country customer_birth_country 8 | ,c_login customer_login 9 | ,c_email_address customer_email_address 10 | ,d_year dyear 11 | ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total 12 | ,'s' sale_type 13 | from customer 14 | ,store_sales 15 | ,date_dim 16 | where c_customer_sk = ss_customer_sk 17 | and ss_sold_date_sk = d_date_sk 18 | group by c_customer_id 19 | ,c_first_name 20 | ,c_last_name 21 | ,d_year 22 | ,c_preferred_cust_flag 23 | ,c_birth_country 24 | ,c_login 25 | ,c_email_address 26 | ,d_year 27 | union all 28 | select c_customer_id customer_id 29 | ,c_first_name customer_first_name 30 | ,c_last_name customer_last_name 31 | ,c_preferred_cust_flag 32 | ,c_birth_country customer_birth_country 33 | ,c_login customer_login 34 | ,c_email_address customer_email_address 35 | ,d_year dyear 36 | ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total 37 | ,'w' sale_type 38 | from customer 39 | ,web_sales 40 | ,date_dim 41 | where c_customer_sk = ws_bill_customer_sk 42 | and ws_sold_date_sk = d_date_sk 43 | group by c_customer_id 44 | ,c_first_name 45 | ,c_last_name 46 | ,c_preferred_cust_flag 47 | ,c_birth_country 48 | ,c_login 49 | ,c_email_address 50 | ,d_year 51 | ) 52 | select t_s_secyear.c_preferred_cust_flag 53 | from year_total t_s_firstyear 54 | ,year_total t_s_secyear 55 | ,year_total t_w_firstyear 56 | ,year_total t_w_secyear 57 | where t_s_secyear.customer_id = t_s_firstyear.customer_id 58 | and t_s_firstyear.customer_id = t_w_secyear.customer_id 59 | and t_s_firstyear.customer_id = t_w_firstyear.customer_id 60 | and t_s_firstyear.sale_type = 's' 61 | and t_w_firstyear.sale_type = 'w' 62 | and t_s_secyear.sale_type = 's' 63 | and t_w_secyear.sale_type = 'w' 64 | and t_s_firstyear.dyear = 2001 65 | and t_s_secyear.dyear = 2001+1 66 | and t_w_firstyear.dyear = 2001 67 | and t_w_secyear.dyear = 2001+1 68 | and t_s_firstyear.year_total > 0 69 | and t_w_firstyear.year_total > 0 70 | and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end 71 | > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end 72 | order by t_s_secyear.c_preferred_cust_flag 73 | limit 100 74 | 75 | -- end query 1 in stream 0 using template query11.tpl 76 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q12.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query12.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,i_item_id 7 | ,sum(ws_ext_sales_price) as itemrevenue 8 | ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over 9 | (partition by i_class) as revenueratio 10 | from 11 | web_sales 12 | ,item 13 | ,date_dim 14 | where 15 | ws_item_sk = i_item_sk 16 | and i_category in ('Jewelry', 'Sports', 'Books') 17 | and ws_sold_date_sk = d_date_sk 18 | and d_date between cast('2001-01-12' as date) 19 | and (cast('2001-01-12' as date) + interval '30' day) 20 | group by 21 | i_item_id 22 | ,i_item_desc 23 | ,i_category 24 | ,i_class 25 | ,i_current_price 26 | order by 27 | i_category 28 | ,i_class 29 | ,i_item_id 30 | ,i_item_desc 31 | ,revenueratio 32 | limit 100 33 | 34 | -- end query 1 in stream 0 using template query12.tpl 35 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q13.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query13.tpl and seed 622697896 2 | select avg(ss_quantity) 3 | ,avg(ss_ext_sales_price) 4 | ,avg(ss_ext_wholesale_cost) 5 | ,sum(ss_ext_wholesale_cost) 6 | from store_sales 7 | ,store 8 | ,customer_demographics 9 | ,household_demographics 10 | ,customer_address 11 | ,date_dim 12 | where s_store_sk = ss_store_sk 13 | and ss_sold_date_sk = d_date_sk and d_year = 2001 14 | and((ss_hdemo_sk=hd_demo_sk 15 | and cd_demo_sk = ss_cdemo_sk 16 | and cd_marital_status = 'M' 17 | and cd_education_status = '4 yr Degree' 18 | and ss_sales_price between 100.00 and 150.00 19 | and hd_dep_count = 3 20 | )or 21 | (ss_hdemo_sk=hd_demo_sk 22 | and cd_demo_sk = ss_cdemo_sk 23 | and cd_marital_status = 'D' 24 | and cd_education_status = 'Primary' 25 | and ss_sales_price between 50.00 and 100.00 26 | and hd_dep_count = 1 27 | ) or 28 | (ss_hdemo_sk=hd_demo_sk 29 | and cd_demo_sk = ss_cdemo_sk 30 | and cd_marital_status = 'U' 31 | and cd_education_status = 'Advanced Degree' 32 | and ss_sales_price between 150.00 and 200.00 33 | and hd_dep_count = 1 34 | )) 35 | and((ss_addr_sk = ca_address_sk 36 | and ca_country = 'United States' 37 | and ca_state in ('KY', 'GA', 'NM') 38 | and ss_net_profit between 100 and 200 39 | ) or 40 | (ss_addr_sk = ca_address_sk 41 | and ca_country = 'United States' 42 | and ca_state in ('MT', 'OR', 'IN') 43 | and ss_net_profit between 150 and 300 44 | ) or 45 | (ss_addr_sk = ca_address_sk 46 | and ca_country = 'United States' 47 | and ca_state in ('WI', 'MO', 'WV') 48 | and ss_net_profit between 50 and 250 49 | )) 50 | 51 | 52 | -- end query 1 in stream 0 using template query13.tpl 53 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q14a.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query14.tpl and seed 1819994127 2 | with cross_items as 3 | (select i_item_sk ss_item_sk 4 | from item, 5 | (select iss.i_brand_id brand_id 6 | ,iss.i_class_id class_id 7 | ,iss.i_category_id category_id 8 | from store_sales 9 | ,item iss 10 | ,date_dim d1 11 | where ss_item_sk = iss.i_item_sk 12 | and ss_sold_date_sk = d1.d_date_sk 13 | and d1.d_year between 1999 AND 1999 + 2 14 | intersect 15 | select ics.i_brand_id 16 | ,ics.i_class_id 17 | ,ics.i_category_id 18 | from catalog_sales 19 | ,item ics 20 | ,date_dim d2 21 | where cs_item_sk = ics.i_item_sk 22 | and cs_sold_date_sk = d2.d_date_sk 23 | and d2.d_year between 1999 AND 1999 + 2 24 | intersect 25 | select iws.i_brand_id 26 | ,iws.i_class_id 27 | ,iws.i_category_id 28 | from web_sales 29 | ,item iws 30 | ,date_dim d3 31 | where ws_item_sk = iws.i_item_sk 32 | and ws_sold_date_sk = d3.d_date_sk 33 | and d3.d_year between 1999 AND 1999 + 2) x 34 | where i_brand_id = brand_id 35 | and i_class_id = class_id 36 | and i_category_id = category_id 37 | ), 38 | avg_sales as 39 | (select avg(quantity*list_price) average_sales 40 | from (select ss_quantity quantity 41 | ,ss_list_price list_price 42 | from store_sales 43 | ,date_dim 44 | where ss_sold_date_sk = d_date_sk 45 | and d_year between 1999 and 2001 46 | union all 47 | select cs_quantity quantity 48 | ,cs_list_price list_price 49 | from catalog_sales 50 | ,date_dim 51 | where cs_sold_date_sk = d_date_sk 52 | and d_year between 1998 and 1998 + 2 53 | union all 54 | select ws_quantity quantity 55 | ,ws_list_price list_price 56 | from web_sales 57 | ,date_dim 58 | where ws_sold_date_sk = d_date_sk 59 | and d_year between 1998 and 1998 + 2) x) 60 | select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) 61 | from( 62 | select 'store' channel, i_brand_id,i_class_id 63 | ,i_category_id,sum(ss_quantity*ss_list_price) sales 64 | , count(*) number_sales 65 | from store_sales 66 | ,item 67 | ,date_dim 68 | where ss_item_sk in (select ss_item_sk from cross_items) 69 | and ss_item_sk = i_item_sk 70 | and ss_sold_date_sk = d_date_sk 71 | and d_year = 1998+2 72 | and d_moy = 11 73 | group by i_brand_id,i_class_id,i_category_id 74 | having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) 75 | union all 76 | select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales 77 | from catalog_sales 78 | ,item 79 | ,date_dim 80 | where cs_item_sk in (select ss_item_sk from cross_items) 81 | and cs_item_sk = i_item_sk 82 | and cs_sold_date_sk = d_date_sk 83 | and d_year = 1998+2 84 | and d_moy = 11 85 | group by i_brand_id,i_class_id,i_category_id 86 | having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) 87 | union all 88 | select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales 89 | from web_sales 90 | ,item 91 | ,date_dim 92 | where ws_item_sk in (select ss_item_sk from cross_items) 93 | and ws_item_sk = i_item_sk 94 | and ws_sold_date_sk = d_date_sk 95 | and d_year = 1998+2 96 | and d_moy = 11 97 | group by i_brand_id,i_class_id,i_category_id 98 | having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) 99 | ) y 100 | group by rollup (channel, i_brand_id,i_class_id,i_category_id) 101 | order by channel,i_brand_id,i_class_id,i_category_id 102 | limit 100 103 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q14b.sql: -------------------------------------------------------------------------------- 1 | with cross_items as 2 | (select i_item_sk ss_item_sk 3 | from item, 4 | (select iss.i_brand_id brand_id 5 | ,iss.i_class_id class_id 6 | ,iss.i_category_id category_id 7 | from store_sales 8 | ,item iss 9 | ,date_dim d1 10 | where ss_item_sk = iss.i_item_sk 11 | and ss_sold_date_sk = d1.d_date_sk 12 | and d1.d_year between 1999 AND 1999 + 2 13 | intersect 14 | select ics.i_brand_id 15 | ,ics.i_class_id 16 | ,ics.i_category_id 17 | from catalog_sales 18 | ,item ics 19 | ,date_dim d2 20 | where cs_item_sk = ics.i_item_sk 21 | and cs_sold_date_sk = d2.d_date_sk 22 | and d2.d_year between 1999 AND 1999 + 2 23 | intersect 24 | select iws.i_brand_id 25 | ,iws.i_class_id 26 | ,iws.i_category_id 27 | from web_sales 28 | ,item iws 29 | ,date_dim d3 30 | where ws_item_sk = iws.i_item_sk 31 | and ws_sold_date_sk = d3.d_date_sk 32 | and d3.d_year between 1999 AND 1999 + 2) x 33 | where i_brand_id = brand_id 34 | and i_class_id = class_id 35 | and i_category_id = category_id 36 | ), 37 | avg_sales as 38 | (select avg(quantity*list_price) average_sales 39 | from (select ss_quantity quantity 40 | ,ss_list_price list_price 41 | from store_sales 42 | ,date_dim 43 | where ss_sold_date_sk = d_date_sk 44 | and d_year between 1998 and 1998 + 2 45 | union all 46 | select cs_quantity quantity 47 | ,cs_list_price list_price 48 | from catalog_sales 49 | ,date_dim 50 | where cs_sold_date_sk = d_date_sk 51 | and d_year between 1998 and 1998 + 2 52 | union all 53 | select ws_quantity quantity 54 | ,ws_list_price list_price 55 | from web_sales 56 | ,date_dim 57 | where ws_sold_date_sk = d_date_sk 58 | and d_year between 1998 and 1998 + 2) x) 59 | select * from 60 | (select 'store' channel, i_brand_id,i_class_id,i_category_id 61 | ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales 62 | from store_sales 63 | ,item 64 | ,date_dim 65 | where ss_item_sk in (select ss_item_sk from cross_items) 66 | and ss_item_sk = i_item_sk 67 | and ss_sold_date_sk = d_date_sk 68 | and d_week_seq = (select d_week_seq 69 | from date_dim 70 | where d_year = 1998 + 1 71 | and d_moy = 12 72 | and d_dom = 16) 73 | group by i_brand_id,i_class_id,i_category_id 74 | having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, 75 | (select 'store' channel, i_brand_id,i_class_id 76 | ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales 77 | from store_sales 78 | ,item 79 | ,date_dim 80 | where ss_item_sk in (select ss_item_sk from cross_items) 81 | and ss_item_sk = i_item_sk 82 | and ss_sold_date_sk = d_date_sk 83 | and d_week_seq = (select d_week_seq 84 | from date_dim 85 | where d_year = 1998 86 | and d_moy = 12 87 | and d_dom = 16) 88 | group by i_brand_id,i_class_id,i_category_id 89 | having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year 90 | where this_year.i_brand_id= last_year.i_brand_id 91 | and this_year.i_class_id = last_year.i_class_id 92 | and this_year.i_category_id = last_year.i_category_id 93 | order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id 94 | limit 100 95 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q15.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query15.tpl and seed 1819994127 2 | select ca_zip 3 | ,sum(cs_sales_price) 4 | from catalog_sales 5 | ,customer 6 | ,customer_address 7 | ,date_dim 8 | where cs_bill_customer_sk = c_customer_sk 9 | and c_current_addr_sk = ca_address_sk 10 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', 11 | '85392', '85460', '80348', '81792') 12 | or ca_state in ('CA','WA','GA') 13 | or cs_sales_price > 500) 14 | and cs_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip 17 | order by ca_zip 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query15.tpl 21 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q16.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query16.tpl and seed 171719422 2 | select 3 | count(distinct cs_order_number) as `order count` 4 | ,sum(cs_ext_ship_cost) as `total shipping cost` 5 | ,sum(cs_net_profit) as `total net profit` 6 | from 7 | catalog_sales cs1 8 | ,date_dim 9 | ,customer_address 10 | ,call_center 11 | where 12 | d_date between '2001-4-01' and 13 | (cast('2001-4-01' as date) + interval '60' day) 14 | and cs1.cs_ship_date_sk = d_date_sk 15 | and cs1.cs_ship_addr_sk = ca_address_sk 16 | and ca_state = 'NY' 17 | and cs1.cs_call_center_sk = cc_call_center_sk 18 | and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', 19 | 'Daviess County' 20 | ) 21 | and exists (select * 22 | from catalog_sales cs2 23 | where cs1.cs_order_number = cs2.cs_order_number 24 | and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) 25 | and not exists(select * 26 | from catalog_returns cr1 27 | where cs1.cs_order_number = cr1.cr_order_number) 28 | order by count(distinct cs_order_number) 29 | limit 100 30 | 31 | -- end query 1 in stream 0 using template query16.tpl 32 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q17.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query17.tpl and seed 1819994127 2 | select i_item_id 3 | ,i_item_desc 4 | ,s_state 5 | ,count(ss_quantity) as store_sales_quantitycount 6 | ,avg(ss_quantity) as store_sales_quantityave 7 | ,stddev_samp(ss_quantity) as store_sales_quantitystdev 8 | ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov 9 | ,count(sr_return_quantity) as_store_returns_quantitycount 10 | ,avg(sr_return_quantity) as_store_returns_quantityave 11 | ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev 12 | ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov 13 | ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave 14 | ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev 15 | ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov 16 | from store_sales 17 | ,store_returns 18 | ,catalog_sales 19 | ,date_dim d1 20 | ,date_dim d2 21 | ,date_dim d3 22 | ,store 23 | ,item 24 | where d1.d_quarter_name = '2000Q1' 25 | and d1.d_date_sk = ss_sold_date_sk 26 | and i_item_sk = ss_item_sk 27 | and s_store_sk = ss_store_sk 28 | and ss_customer_sk = sr_customer_sk 29 | and ss_item_sk = sr_item_sk 30 | and ss_ticket_number = sr_ticket_number 31 | and sr_returned_date_sk = d2.d_date_sk 32 | and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') 33 | and sr_customer_sk = cs_bill_customer_sk 34 | and sr_item_sk = cs_item_sk 35 | and cs_sold_date_sk = d3.d_date_sk 36 | and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') 37 | group by i_item_id 38 | ,i_item_desc 39 | ,s_state 40 | order by i_item_id 41 | ,i_item_desc 42 | ,s_state 43 | limit 100 44 | 45 | -- end query 1 in stream 0 using template query17.tpl 46 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q18.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query18.tpl and seed 1978355063 2 | select i_item_id, 3 | ca_country, 4 | ca_state, 5 | ca_county, 6 | avg(cast(cs_quantity as decimal(12,2))) agg1, 7 | avg( cast(cs_list_price as decimal(12,2))) agg2, 8 | avg( cast(cs_coupon_amt as decimal(12,2))) agg3, 9 | avg( cast(cs_sales_price as decimal(12,2))) agg4, 10 | avg( cast(cs_net_profit as decimal(12,2))) agg5, 11 | avg( cast(c_birth_year as decimal(12,2))) agg6, 12 | avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 13 | from catalog_sales, customer_demographics cd1, 14 | customer_demographics cd2, customer, customer_address, date_dim, item 15 | where cs_sold_date_sk = d_date_sk and 16 | cs_item_sk = i_item_sk and 17 | cs_bill_cdemo_sk = cd1.cd_demo_sk and 18 | cs_bill_customer_sk = c_customer_sk and 19 | cd1.cd_gender = 'M' and 20 | cd1.cd_education_status = 'College' and 21 | c_current_cdemo_sk = cd2.cd_demo_sk and 22 | c_current_addr_sk = ca_address_sk and 23 | c_birth_month in (9,5,12,4,1,10) and 24 | d_year = 2001 and 25 | ca_state in ('ND','WI','AL' 26 | ,'NC','OK','MS','TN') 27 | group by rollup (i_item_id, ca_country, ca_state, ca_county) 28 | order by ca_country, 29 | ca_state, 30 | ca_county, 31 | i_item_id 32 | limit 100 33 | 34 | -- end query 1 in stream 0 using template query18.tpl 35 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q19.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query19.tpl and seed 1930872976 2 | select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item,customer,customer_address,store 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=7 8 | and d_moy=11 9 | and d_year=1999 10 | and ss_customer_sk = c_customer_sk 11 | and c_current_addr_sk = ca_address_sk 12 | and substr(ca_zip,1,5) <> substr(s_zip,1,5) 13 | and ss_store_sk = s_store_sk 14 | group by i_brand 15 | ,i_brand_id 16 | ,i_manufact_id 17 | ,i_manufact 18 | order by ext_price desc 19 | ,i_brand 20 | ,i_brand_id 21 | ,i_manufact_id 22 | ,i_manufact 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query19.tpl 26 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q2.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query2.tpl and seed 1819994127 2 | with wscs as 3 | (select sold_date_sk 4 | ,sales_price 5 | from (select ws_sold_date_sk sold_date_sk 6 | ,ws_ext_sales_price sales_price 7 | from web_sales) x 8 | union all 9 | (select cs_sold_date_sk sold_date_sk 10 | ,cs_ext_sales_price sales_price 11 | from catalog_sales)), 12 | wswscs as 13 | (select d_week_seq, 14 | sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, 15 | sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, 16 | sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, 17 | sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, 18 | sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, 19 | sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, 20 | sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales 21 | from wscs 22 | ,date_dim 23 | where d_date_sk = sold_date_sk 24 | group by d_week_seq) 25 | select d_week_seq1 26 | ,round(sun_sales1/sun_sales2,2) 27 | ,round(mon_sales1/mon_sales2,2) 28 | ,round(tue_sales1/tue_sales2,2) 29 | ,round(wed_sales1/wed_sales2,2) 30 | ,round(thu_sales1/thu_sales2,2) 31 | ,round(fri_sales1/fri_sales2,2) 32 | ,round(sat_sales1/sat_sales2,2) 33 | from 34 | (select wswscs.d_week_seq d_week_seq1 35 | ,sun_sales sun_sales1 36 | ,mon_sales mon_sales1 37 | ,tue_sales tue_sales1 38 | ,wed_sales wed_sales1 39 | ,thu_sales thu_sales1 40 | ,fri_sales fri_sales1 41 | ,sat_sales sat_sales1 42 | from wswscs,date_dim 43 | where date_dim.d_week_seq = wswscs.d_week_seq and 44 | d_year = 2001) y, 45 | (select wswscs.d_week_seq d_week_seq2 46 | ,sun_sales sun_sales2 47 | ,mon_sales mon_sales2 48 | ,tue_sales tue_sales2 49 | ,wed_sales wed_sales2 50 | ,thu_sales thu_sales2 51 | ,fri_sales fri_sales2 52 | ,sat_sales sat_sales2 53 | from wswscs 54 | ,date_dim 55 | where date_dim.d_week_seq = wswscs.d_week_seq and 56 | d_year = 2001+1) z 57 | where d_week_seq1=d_week_seq2-53 58 | order by d_week_seq1 59 | 60 | -- end query 1 in stream 0 using template query2.tpl 61 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q20.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query20.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(cs_ext_sales_price) as itemrevenue 7 | ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from catalog_sales 10 | ,item 11 | ,date_dim 12 | where cs_item_sk = i_item_sk 13 | and i_category in ('Jewelry', 'Sports', 'Books') 14 | and cs_sold_date_sk = d_date_sk 15 | and d_date between cast('2001-01-12' as date) 16 | and (cast('2001-01-12' as date) + interval '30' day) 17 | group by i_item_id 18 | ,i_item_desc 19 | ,i_category 20 | ,i_class 21 | ,i_current_price 22 | order by i_category 23 | ,i_class 24 | ,i_item_id 25 | ,i_item_desc 26 | ,revenueratio 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query20.tpl 30 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q21.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query21.tpl and seed 1819994127 2 | select * 3 | from(select w_warehouse_name 4 | ,i_item_id 5 | ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) 6 | then inv_quantity_on_hand 7 | else 0 end) as inv_before 8 | ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) 9 | then inv_quantity_on_hand 10 | else 0 end) as inv_after 11 | from inventory 12 | ,warehouse 13 | ,item 14 | ,date_dim 15 | where i_current_price between 0.99 and 1.49 16 | and i_item_sk = inv_item_sk 17 | and inv_warehouse_sk = w_warehouse_sk 18 | and inv_date_sk = d_date_sk 19 | and d_date between (cast ('1998-04-08' as date) - interval '30' day) 20 | and (cast ('1998-04-08' as date) + interval '30' day) 21 | group by w_warehouse_name, i_item_id) x 22 | where (case when inv_before > 0 23 | then inv_after / inv_before 24 | else null 25 | end) between 2.0/3.0 and 3.0/2.0 26 | order by w_warehouse_name 27 | ,i_item_id 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query21.tpl 31 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q22.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query22.tpl and seed 1819994127 2 | select i_product_name 3 | ,i_brand 4 | ,i_class 5 | ,i_category 6 | ,avg(inv_quantity_on_hand) qoh 7 | from inventory 8 | ,date_dim 9 | ,item 10 | ,warehouse 11 | where inv_date_sk=d_date_sk 12 | and inv_item_sk=i_item_sk 13 | and inv_warehouse_sk = w_warehouse_sk 14 | and d_month_seq between 1212 and 1212 + 11 15 | group by rollup(i_product_name 16 | ,i_brand 17 | ,i_class 18 | ,i_category) 19 | order by qoh, i_product_name, i_brand, i_class, i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query22.tpl 23 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q23a.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query23.tpl and seed 2031708268 2 | with frequent_ss_items as 3 | (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt 4 | from store_sales 5 | ,date_dim 6 | ,item 7 | where ss_sold_date_sk = d_date_sk 8 | and ss_item_sk = i_item_sk 9 | and d_year in (1999,1999+1,1999+2,1999+3) 10 | group by substr(i_item_desc,1,30),i_item_sk,d_date 11 | having count(*) >4), 12 | max_store_sales as 13 | (select max(csales) tpcds_cmax 14 | from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales 15 | from store_sales 16 | ,customer 17 | ,date_dim 18 | where ss_customer_sk = c_customer_sk 19 | and ss_sold_date_sk = d_date_sk 20 | and d_year in (1999,1999+1,1999+2,1999+3) 21 | group by c_customer_sk) x), 22 | best_ss_customer as 23 | (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales 24 | from store_sales 25 | ,customer 26 | where ss_customer_sk = c_customer_sk 27 | group by c_customer_sk 28 | having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select 29 | * 30 | from 31 | max_store_sales)) 32 | select sum(sales) 33 | from ((select cs_quantity*cs_list_price sales 34 | from catalog_sales 35 | ,date_dim 36 | where d_year = 1999 37 | and d_moy = 1 38 | and cs_sold_date_sk = d_date_sk 39 | and cs_item_sk in (select item_sk from frequent_ss_items) 40 | and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) 41 | union all 42 | (select ws_quantity*ws_list_price sales 43 | from web_sales 44 | ,date_dim 45 | where d_year = 1999 46 | and d_moy = 1 47 | and ws_sold_date_sk = d_date_sk 48 | and ws_item_sk in (select item_sk from frequent_ss_items) 49 | and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y 50 | limit 100 51 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q23b.sql: -------------------------------------------------------------------------------- 1 | with frequent_ss_items as 2 | (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt 3 | from store_sales 4 | ,date_dim 5 | ,item 6 | where ss_sold_date_sk = d_date_sk 7 | and ss_item_sk = i_item_sk 8 | and d_year in (1999,1999 + 1,1999 + 2,1999 + 3) 9 | group by substr(i_item_desc,1,30),i_item_sk,d_date 10 | having count(*) >4), 11 | max_store_sales as 12 | (select max(csales) tpcds_cmax 13 | from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales 14 | from store_sales 15 | ,customer 16 | ,date_dim 17 | where ss_customer_sk = c_customer_sk 18 | and ss_sold_date_sk = d_date_sk 19 | and d_year in (1999,1999+1,1999+2,1999+3) 20 | group by c_customer_sk) x), 21 | best_ss_customer as 22 | (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales 23 | from store_sales 24 | ,customer 25 | where ss_customer_sk = c_customer_sk 26 | group by c_customer_sk 27 | having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select 28 | * 29 | from max_store_sales)) 30 | select c_last_name,c_first_name,sales 31 | from ((select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales 32 | from catalog_sales 33 | ,customer 34 | ,date_dim 35 | where d_year = 1999 36 | and d_moy = 1 37 | and cs_sold_date_sk = d_date_sk 38 | and cs_item_sk in (select item_sk from frequent_ss_items) 39 | and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) 40 | and cs_bill_customer_sk = c_customer_sk 41 | group by c_last_name,c_first_name) 42 | union all 43 | (select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales 44 | from web_sales 45 | ,customer 46 | ,date_dim 47 | where d_year = 1999 48 | and d_moy = 1 49 | and ws_sold_date_sk = d_date_sk 50 | and ws_item_sk in (select item_sk from frequent_ss_items) 51 | and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) 52 | and ws_bill_customer_sk = c_customer_sk 53 | group by c_last_name,c_first_name)) y 54 | order by c_last_name,c_first_name,sales 55 | limit 100 56 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q24.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query24.tpl and seed 1220860970 2 | with ssales as 3 | (select c_last_name 4 | ,c_first_name 5 | ,s_store_name 6 | ,ca_state 7 | ,s_state 8 | ,i_color 9 | ,i_current_price 10 | ,i_manager_id 11 | ,i_units 12 | ,i_size 13 | ,sum(ss_sales_price) netpaid 14 | from store_sales 15 | ,store_returns 16 | ,store 17 | ,item 18 | ,customer 19 | ,customer_address 20 | where ss_ticket_number = sr_ticket_number 21 | and ss_item_sk = sr_item_sk 22 | and ss_customer_sk = c_customer_sk 23 | and ss_item_sk = i_item_sk 24 | and ss_store_sk = s_store_sk 25 | and c_current_addr_sk = ca_address_sk 26 | and c_birth_country <> upper(ca_country) 27 | and s_zip = ca_zip 28 | and s_market_id=7 29 | group by c_last_name 30 | ,c_first_name 31 | ,s_store_name 32 | ,ca_state 33 | ,s_state 34 | ,i_color 35 | ,i_current_price 36 | ,i_manager_id 37 | ,i_units 38 | ,i_size) 39 | select c_last_name 40 | ,c_first_name 41 | ,s_store_name 42 | ,sum(netpaid) paid 43 | from ssales 44 | where i_color = 'orchid' 45 | group by c_last_name 46 | ,c_first_name 47 | ,s_store_name 48 | having sum(netpaid) > (select 0.05*avg(netpaid) 49 | from ssales) 50 | 51 | 52 | -- end query 1 in stream 0 using template query24.tpl 53 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q25.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query25.tpl and seed 1819994127 2 | select 3 | i_item_id 4 | ,i_item_desc 5 | ,s_store_id 6 | ,s_store_name 7 | ,sum(ss_net_profit) as store_sales_profit 8 | ,sum(sr_net_loss) as store_returns_loss 9 | ,sum(cs_net_profit) as catalog_sales_profit 10 | from 11 | store_sales 12 | ,store_returns 13 | ,catalog_sales 14 | ,date_dim d1 15 | ,date_dim d2 16 | ,date_dim d3 17 | ,store 18 | ,item 19 | where 20 | d1.d_moy = 4 21 | and d1.d_year = 2000 22 | and d1.d_date_sk = ss_sold_date_sk 23 | and i_item_sk = ss_item_sk 24 | and s_store_sk = ss_store_sk 25 | and ss_customer_sk = sr_customer_sk 26 | and ss_item_sk = sr_item_sk 27 | and ss_ticket_number = sr_ticket_number 28 | and sr_returned_date_sk = d2.d_date_sk 29 | and d2.d_moy between 4 and 10 30 | and d2.d_year = 2000 31 | and sr_customer_sk = cs_bill_customer_sk 32 | and sr_item_sk = cs_item_sk 33 | and cs_sold_date_sk = d3.d_date_sk 34 | and d3.d_moy between 4 and 10 35 | and d3.d_year = 2000 36 | group by 37 | i_item_id 38 | ,i_item_desc 39 | ,s_store_id 40 | ,s_store_name 41 | order by 42 | i_item_id 43 | ,i_item_desc 44 | ,s_store_id 45 | ,s_store_name 46 | limit 100 47 | 48 | -- end query 1 in stream 0 using template query25.tpl 49 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q26.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query26.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(cs_quantity) agg1, 4 | avg(cs_list_price) agg2, 5 | avg(cs_coupon_amt) agg3, 6 | avg(cs_sales_price) agg4 7 | from catalog_sales, customer_demographics, date_dim, item, promotion 8 | where cs_sold_date_sk = d_date_sk and 9 | cs_item_sk = i_item_sk and 10 | cs_bill_cdemo_sk = cd_demo_sk and 11 | cs_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query26.tpl 22 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q27.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query27.tpl and seed 2017787633 2 | select i_item_id, 3 | s_state, grouping(s_state) g_state, 4 | avg(ss_quantity) agg1, 5 | avg(ss_list_price) agg2, 6 | avg(ss_coupon_amt) agg3, 7 | avg(ss_sales_price) agg4 8 | from store_sales, customer_demographics, date_dim, store, item 9 | where ss_sold_date_sk = d_date_sk and 10 | ss_item_sk = i_item_sk and 11 | ss_store_sk = s_store_sk and 12 | ss_cdemo_sk = cd_demo_sk and 13 | cd_gender = 'M' and 14 | cd_marital_status = 'U' and 15 | cd_education_status = '2 yr Degree' and 16 | d_year = 2001 and 17 | s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') 18 | group by rollup (i_item_id, s_state) 19 | order by i_item_id 20 | ,s_state 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query27.tpl 24 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q28.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query28.tpl and seed 444293455 2 | select * 3 | from (select avg(ss_list_price) B1_LP 4 | ,count(ss_list_price) B1_CNT 5 | ,count(distinct ss_list_price) B1_CNTD 6 | from store_sales 7 | where ss_quantity between 0 and 5 8 | and (ss_list_price between 11 and 11+10 9 | or ss_coupon_amt between 460 and 460+1000 10 | or ss_wholesale_cost between 14 and 14+20)) B1, 11 | (select avg(ss_list_price) B2_LP 12 | ,count(ss_list_price) B2_CNT 13 | ,count(distinct ss_list_price) B2_CNTD 14 | from store_sales 15 | where ss_quantity between 6 and 10 16 | and (ss_list_price between 91 and 91+10 17 | or ss_coupon_amt between 1430 and 1430+1000 18 | or ss_wholesale_cost between 32 and 32+20)) B2, 19 | (select avg(ss_list_price) B3_LP 20 | ,count(ss_list_price) B3_CNT 21 | ,count(distinct ss_list_price) B3_CNTD 22 | from store_sales 23 | where ss_quantity between 11 and 15 24 | and (ss_list_price between 66 and 66+10 25 | or ss_coupon_amt between 920 and 920+1000 26 | or ss_wholesale_cost between 4 and 4+20)) B3, 27 | (select avg(ss_list_price) B4_LP 28 | ,count(ss_list_price) B4_CNT 29 | ,count(distinct ss_list_price) B4_CNTD 30 | from store_sales 31 | where ss_quantity between 16 and 20 32 | and (ss_list_price between 142 and 142+10 33 | or ss_coupon_amt between 3054 and 3054+1000 34 | or ss_wholesale_cost between 80 and 80+20)) B4, 35 | (select avg(ss_list_price) B5_LP 36 | ,count(ss_list_price) B5_CNT 37 | ,count(distinct ss_list_price) B5_CNTD 38 | from store_sales 39 | where ss_quantity between 21 and 25 40 | and (ss_list_price between 135 and 135+10 41 | or ss_coupon_amt between 14180 and 14180+1000 42 | or ss_wholesale_cost between 38 and 38+20)) B5, 43 | (select avg(ss_list_price) B6_LP 44 | ,count(ss_list_price) B6_CNT 45 | ,count(distinct ss_list_price) B6_CNTD 46 | from store_sales 47 | where ss_quantity between 26 and 30 48 | and (ss_list_price between 28 and 28+10 49 | or ss_coupon_amt between 2513 and 2513+1000 50 | or ss_wholesale_cost between 42 and 42+20)) B6 51 | limit 100 52 | 53 | -- end query 1 in stream 0 using template query28.tpl 54 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q29.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query29.tpl and seed 2031708268 2 | select 3 | i_item_id 4 | ,i_item_desc 5 | ,s_store_id 6 | ,s_store_name 7 | ,sum(ss_quantity) as store_sales_quantity 8 | ,sum(sr_return_quantity) as store_returns_quantity 9 | ,sum(cs_quantity) as catalog_sales_quantity 10 | from 11 | store_sales 12 | ,store_returns 13 | ,catalog_sales 14 | ,date_dim d1 15 | ,date_dim d2 16 | ,date_dim d3 17 | ,store 18 | ,item 19 | where 20 | d1.d_moy = 4 21 | and d1.d_year = 1999 22 | and d1.d_date_sk = ss_sold_date_sk 23 | and i_item_sk = ss_item_sk 24 | and s_store_sk = ss_store_sk 25 | and ss_customer_sk = sr_customer_sk 26 | and ss_item_sk = sr_item_sk 27 | and ss_ticket_number = sr_ticket_number 28 | and sr_returned_date_sk = d2.d_date_sk 29 | and d2.d_moy between 4 and 4 + 3 30 | and d2.d_year = 1999 31 | and sr_customer_sk = cs_bill_customer_sk 32 | and sr_item_sk = cs_item_sk 33 | and cs_sold_date_sk = d3.d_date_sk 34 | and d3.d_year in (1999,1999+1,1999+2) 35 | group by 36 | i_item_id 37 | ,i_item_desc 38 | ,s_store_id 39 | ,s_store_name 40 | order by 41 | i_item_id 42 | ,i_item_desc 43 | ,s_store_id 44 | ,s_store_name 45 | limit 100 46 | 47 | -- end query 1 in stream 0 using template query29.tpl 48 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q3.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query3.tpl and seed 2031708268 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) sum_agg 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manufact_id = 436 12 | and dt.d_moy=12 13 | group by dt.d_year 14 | ,item.i_brand 15 | ,item.i_brand_id 16 | order by dt.d_year 17 | ,sum_agg desc 18 | ,brand_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query3.tpl 22 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q30.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query30.tpl and seed 1819994127 2 | with customer_total_return as 3 | (select wr_returning_customer_sk as ctr_customer_sk 4 | ,ca_state as ctr_state, 5 | sum(wr_return_amt) as ctr_total_return 6 | from web_returns 7 | ,date_dim 8 | ,customer_address 9 | where wr_returned_date_sk = d_date_sk 10 | and d_year =2002 11 | and wr_returning_addr_sk = ca_address_sk 12 | group by wr_returning_customer_sk 13 | ,ca_state) 14 | select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag 15 | ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address 16 | ,c_last_review_date,ctr_total_return 17 | from customer_total_return ctr1 18 | ,customer_address 19 | ,customer 20 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 21 | from customer_total_return ctr2 22 | where ctr1.ctr_state = ctr2.ctr_state) 23 | and ca_address_sk = c_current_addr_sk 24 | and ca_state = 'IL' 25 | and ctr1.ctr_customer_sk = c_customer_sk 26 | order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag 27 | ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address 28 | ,c_last_review_date,ctr_total_return 29 | limit 100 30 | 31 | -- end query 1 in stream 0 using template query30.tpl 32 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q31.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query31.tpl and seed 1819994127 2 | with ss as 3 | (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales 4 | from store_sales,date_dim,customer_address 5 | where ss_sold_date_sk = d_date_sk 6 | and ss_addr_sk=ca_address_sk 7 | group by ca_county,d_qoy, d_year), 8 | ws as 9 | (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales 10 | from web_sales,date_dim,customer_address 11 | where ws_sold_date_sk = d_date_sk 12 | and ws_bill_addr_sk=ca_address_sk 13 | group by ca_county,d_qoy, d_year) 14 | select /* tt */ 15 | ss1.ca_county 16 | ,ss1.d_year 17 | ,ws2.web_sales/ws1.web_sales web_q1_q2_increase 18 | ,ss2.store_sales/ss1.store_sales store_q1_q2_increase 19 | ,ws3.web_sales/ws2.web_sales web_q2_q3_increase 20 | ,ss3.store_sales/ss2.store_sales store_q2_q3_increase 21 | from 22 | ss ss1 23 | ,ss ss2 24 | ,ss ss3 25 | ,ws ws1 26 | ,ws ws2 27 | ,ws ws3 28 | where 29 | ss1.d_qoy = 1 30 | and ss1.d_year = 2000 31 | and ss1.ca_county = ss2.ca_county 32 | and ss2.d_qoy = 2 33 | and ss2.d_year = 2000 34 | and ss2.ca_county = ss3.ca_county 35 | and ss3.d_qoy = 3 36 | and ss3.d_year = 2000 37 | and ss1.ca_county = ws1.ca_county 38 | and ws1.d_qoy = 1 39 | and ws1.d_year = 2000 40 | and ws1.ca_county = ws2.ca_county 41 | and ws2.d_qoy = 2 42 | and ws2.d_year = 2000 43 | and ws1.ca_county = ws3.ca_county 44 | and ws3.d_qoy = 3 45 | and ws3.d_year =2000 46 | and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end 47 | > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end 48 | and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end 49 | > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end 50 | order by ss1.d_year 51 | 52 | -- end query 1 in stream 0 using template query31.tpl 53 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q32.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query32.tpl and seed 2031708268 2 | select sum(cs_ext_discount_amt) as `excess discount amount` 3 | from 4 | catalog_sales 5 | ,item 6 | ,date_dim 7 | where 8 | i_manufact_id = 269 9 | and i_item_sk = cs_item_sk 10 | and d_date between '1998-03-18' and 11 | (cast('1998-03-18' as date) + interval '90' day) 12 | and d_date_sk = cs_sold_date_sk 13 | and cs_ext_discount_amt 14 | > ( 15 | select 16 | 1.3 * avg(cs_ext_discount_amt) 17 | from 18 | catalog_sales 19 | ,date_dim 20 | where 21 | cs_item_sk = i_item_sk 22 | and d_date between '1998-03-18' and 23 | (cast('1998-03-18' as date) + interval '90' day) 24 | and d_date_sk = cs_sold_date_sk 25 | ) 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query32.tpl 29 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q33.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query33.tpl and seed 1930872976 2 | with ss as ( 3 | select 4 | i_manufact_id,sum(ss_ext_sales_price) total_sales 5 | from 6 | store_sales, 7 | date_dim, 8 | customer_address, 9 | item 10 | where 11 | i_manufact_id in (select 12 | i_manufact_id 13 | from 14 | item 15 | where i_category in ('Books')) 16 | and ss_item_sk = i_item_sk 17 | and ss_sold_date_sk = d_date_sk 18 | and d_year = 1999 19 | and d_moy = 3 20 | and ss_addr_sk = ca_address_sk 21 | and ca_gmt_offset = -6 22 | group by i_manufact_id), 23 | cs as ( 24 | select 25 | i_manufact_id,sum(cs_ext_sales_price) total_sales 26 | from 27 | catalog_sales, 28 | date_dim, 29 | customer_address, 30 | item 31 | where 32 | i_manufact_id in (select 33 | i_manufact_id 34 | from 35 | item 36 | where i_category in ('Books')) 37 | and cs_item_sk = i_item_sk 38 | and cs_sold_date_sk = d_date_sk 39 | and d_year = 1999 40 | and d_moy = 3 41 | and cs_bill_addr_sk = ca_address_sk 42 | and ca_gmt_offset = -6 43 | group by i_manufact_id), 44 | ws as ( 45 | select 46 | i_manufact_id,sum(ws_ext_sales_price) total_sales 47 | from 48 | web_sales, 49 | date_dim, 50 | customer_address, 51 | item 52 | where 53 | i_manufact_id in (select 54 | i_manufact_id 55 | from 56 | item 57 | where i_category in ('Books')) 58 | and ws_item_sk = i_item_sk 59 | and ws_sold_date_sk = d_date_sk 60 | and d_year = 1999 61 | and d_moy = 3 62 | and ws_bill_addr_sk = ca_address_sk 63 | and ca_gmt_offset = -6 64 | group by i_manufact_id) 65 | select i_manufact_id ,sum(total_sales) total_sales 66 | from (select * from ss 67 | union all 68 | select * from cs 69 | union all 70 | select * from ws) tmp1 71 | group by i_manufact_id 72 | order by total_sales 73 | limit 100 74 | 75 | -- end query 1 in stream 0 using template query33.tpl 76 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q34.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query34.tpl and seed 1971067816 2 | select c_last_name 3 | ,c_first_name 4 | ,c_salutation 5 | ,c_preferred_cust_flag 6 | ,ss_ticket_number 7 | ,cnt from 8 | (select ss_ticket_number 9 | ,ss_customer_sk 10 | ,count(*) cnt 11 | from store_sales,date_dim,store,household_demographics 12 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 13 | and store_sales.ss_store_sk = store.s_store_sk 14 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 15 | and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) 16 | and (household_demographics.hd_buy_potential = '>10000' or 17 | household_demographics.hd_buy_potential = 'unknown') 18 | and household_demographics.hd_vehicle_count > 0 19 | and (case when household_demographics.hd_vehicle_count > 0 20 | then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count 21 | else null 22 | end) > 1.2 23 | and date_dim.d_year in (2000,2000+1,2000+2) 24 | and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County', 25 | 'Fairfield County','Jackson County','Barrow County','Pennington County') 26 | group by ss_ticket_number,ss_customer_sk) dn,customer 27 | where ss_customer_sk = c_customer_sk 28 | and cnt between 15 and 20 29 | order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc 30 | 31 | -- end query 1 in stream 0 using template query34.tpl 32 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q35.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query35.tpl and seed 1930872976 2 | select 3 | ca_state, 4 | cd_gender, 5 | cd_marital_status, 6 | count(*) cnt1, 7 | avg(cd_dep_count), 8 | max(cd_dep_count), 9 | sum(cd_dep_count), 10 | cd_dep_employed_count, 11 | count(*) cnt2, 12 | avg(cd_dep_employed_count), 13 | max(cd_dep_employed_count), 14 | sum(cd_dep_employed_count), 15 | cd_dep_college_count, 16 | count(*) cnt3, 17 | avg(cd_dep_college_count), 18 | max(cd_dep_college_count), 19 | sum(cd_dep_college_count) 20 | from 21 | customer c,customer_address ca,customer_demographics 22 | where 23 | c.c_current_addr_sk = ca.ca_address_sk and 24 | cd_demo_sk = c.c_current_cdemo_sk and 25 | exists (select * 26 | from store_sales,date_dim 27 | where c.c_customer_sk = ss_customer_sk and 28 | ss_sold_date_sk = d_date_sk and 29 | d_year = 1999 and 30 | d_qoy < 4) and 31 | (exists (select * 32 | from web_sales,date_dim 33 | where c.c_customer_sk = ws_bill_customer_sk and 34 | ws_sold_date_sk = d_date_sk and 35 | d_year = 1999 and 36 | d_qoy < 4) or 37 | exists (select * 38 | from catalog_sales,date_dim 39 | where c.c_customer_sk = cs_ship_customer_sk and 40 | cs_sold_date_sk = d_date_sk and 41 | d_year = 1999 and 42 | d_qoy < 4)) 43 | group by ca_state, 44 | cd_gender, 45 | cd_marital_status, 46 | cd_dep_count, 47 | cd_dep_employed_count, 48 | cd_dep_college_count 49 | order by ca_state, 50 | cd_gender, 51 | cd_marital_status, 52 | cd_dep_count, 53 | cd_dep_employed_count, 54 | cd_dep_college_count 55 | limit 100 56 | 57 | -- end query 1 in stream 0 using template query35.tpl 58 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q36.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query36.tpl and seed 1544728811 2 | select 3 | sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent 11 | from 12 | store_sales 13 | ,date_dim d1 14 | ,item 15 | ,store 16 | where 17 | d1.d_year = 1999 18 | and d1.d_date_sk = ss_sold_date_sk 19 | and i_item_sk = ss_item_sk 20 | and s_store_sk = ss_store_sk 21 | and s_state in ('SD','FL','MI','LA', 22 | 'MO','SC','AL','GA') 23 | group by rollup(i_category,i_class) 24 | order by 25 | lochierarchy desc 26 | ,case when lochierarchy = 0 then i_category end 27 | ,rank_within_parent 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query36.tpl 31 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q37.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query37.tpl and seed 301843662 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, catalog_sales 6 | where i_current_price between 22 and 22 + 30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + interval '60' day) 10 | and i_manufact_id in (678,964,918,849) 11 | and inv_quantity_on_hand between 100 and 500 12 | and cs_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query37.tpl 18 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q38.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query38.tpl and seed 1819994127 2 | select count(*) from ( 3 | select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | intersect 9 | select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212 + 11 14 | intersect 15 | select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212 + 11 20 | ) hot_cust 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query38.tpl 24 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q39a.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query39.tpl and seed 1327317894 2 | with inv as 3 | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 4 | ,stdev,mean, case mean when 0 then null else stdev/mean end cov 5 | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 6 | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean 7 | from inventory 8 | ,item 9 | ,warehouse 10 | ,date_dim 11 | where inv_item_sk = i_item_sk 12 | and inv_warehouse_sk = w_warehouse_sk 13 | and inv_date_sk = d_date_sk 14 | and d_year =1999 15 | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo 16 | where case mean when 0 then 0 else stdev/mean end > 1) 17 | select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov 18 | ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov 19 | from inv inv1,inv inv2 20 | where inv1.i_item_sk = inv2.i_item_sk 21 | and inv1.w_warehouse_sk = inv2.w_warehouse_sk 22 | and inv1.d_moy=4 23 | and inv2.d_moy=4+1 24 | order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov 25 | ,inv2.d_moy,inv2.mean, inv2.cov 26 | 27 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q39b.sql: -------------------------------------------------------------------------------- 1 | with inv as 2 | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 3 | ,stdev,mean, case mean when 0 then null else stdev/mean end cov 4 | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 5 | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean 6 | from inventory 7 | ,item 8 | ,warehouse 9 | ,date_dim 10 | where inv_item_sk = i_item_sk 11 | and inv_warehouse_sk = w_warehouse_sk 12 | and inv_date_sk = d_date_sk 13 | and d_year =1999 14 | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo 15 | where case mean when 0 then 0 else stdev/mean end > 1) 16 | select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov 17 | ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov 18 | from inv inv1,inv inv2 19 | where inv1.i_item_sk = inv2.i_item_sk 20 | and inv1.w_warehouse_sk = inv2.w_warehouse_sk 21 | and inv1.d_moy=4 22 | and inv2.d_moy=4+1 23 | and inv1.cov > 1.5 24 | order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov 25 | ,inv2.d_moy,inv2.mean, inv2.cov 26 | 27 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q40.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query40.tpl and seed 1819994127 2 | select 3 | w_state 4 | ,i_item_id 5 | ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) 6 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before 7 | ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) 8 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after 9 | from 10 | catalog_sales left outer join catalog_returns on 11 | (cs_order_number = cr_order_number 12 | and cs_item_sk = cr_item_sk) 13 | ,warehouse 14 | ,item 15 | ,date_dim 16 | where 17 | i_current_price between 0.99 and 1.49 18 | and i_item_sk = cs_item_sk 19 | and cs_warehouse_sk = w_warehouse_sk 20 | and cs_sold_date_sk = d_date_sk 21 | and d_date between (cast ('1998-04-08' as date) - interval '30' day) 22 | and (cast ('1998-04-08' as date) + interval '30' day) 23 | group by 24 | w_state,i_item_id 25 | order by w_state,i_item_id 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query40.tpl 29 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q41.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query41.tpl and seed 1581015815 2 | select distinct(i_product_name) 3 | from item i1 4 | where i_manufact_id between 742 and 742+40 5 | and (select count(*) as item_cnt 6 | from item 7 | where (i_manufact = i1.i_manufact and 8 | ((i_category = 'Women' and 9 | (i_color = 'orchid' or i_color = 'papaya') and 10 | (i_units = 'Pound' or i_units = 'Lb') and 11 | (i_size = 'petite' or i_size = 'medium') 12 | ) or 13 | (i_category = 'Women' and 14 | (i_color = 'burlywood' or i_color = 'navy') and 15 | (i_units = 'Bundle' or i_units = 'Each') and 16 | (i_size = 'N/A' or i_size = 'extra large') 17 | ) or 18 | (i_category = 'Men' and 19 | (i_color = 'bisque' or i_color = 'azure') and 20 | (i_units = 'N/A' or i_units = 'Tsp') and 21 | (i_size = 'small' or i_size = 'large') 22 | ) or 23 | (i_category = 'Men' and 24 | (i_color = 'chocolate' or i_color = 'cornflower') and 25 | (i_units = 'Bunch' or i_units = 'Gross') and 26 | (i_size = 'petite' or i_size = 'medium') 27 | ))) or 28 | (i_manufact = i1.i_manufact and 29 | ((i_category = 'Women' and 30 | (i_color = 'salmon' or i_color = 'midnight') and 31 | (i_units = 'Oz' or i_units = 'Box') and 32 | (i_size = 'petite' or i_size = 'medium') 33 | ) or 34 | (i_category = 'Women' and 35 | (i_color = 'snow' or i_color = 'steel') and 36 | (i_units = 'Carton' or i_units = 'Tbl') and 37 | (i_size = 'N/A' or i_size = 'extra large') 38 | ) or 39 | (i_category = 'Men' and 40 | (i_color = 'purple' or i_color = 'gainsboro') and 41 | (i_units = 'Dram' or i_units = 'Unknown') and 42 | (i_size = 'small' or i_size = 'large') 43 | ) or 44 | (i_category = 'Men' and 45 | (i_color = 'metallic' or i_color = 'forest') and 46 | (i_units = 'Gram' or i_units = 'Ounce') and 47 | (i_size = 'petite' or i_size = 'medium') 48 | )))) > 0 49 | order by i_product_name 50 | limit 100 51 | 52 | -- end query 1 in stream 0 using template query41.tpl 53 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q42.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query42.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_category_id 4 | ,item.i_category 5 | ,sum(ss_ext_sales_price) 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_category_id 16 | ,item.i_category 17 | order by sum(ss_ext_sales_price) desc,dt.d_year 18 | ,item.i_category_id 19 | ,item.i_category 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query42.tpl 23 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q43.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query43.tpl and seed 1819994127 2 | select s_store_name, s_store_id, 3 | sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, 4 | sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, 5 | sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, 6 | sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, 7 | sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, 8 | sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, 9 | sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales 10 | from date_dim, store_sales, store 11 | where d_date_sk = ss_sold_date_sk and 12 | s_store_sk = ss_store_sk and 13 | s_gmt_offset = -6 and 14 | d_year = 1998 15 | group by s_store_name, s_store_id 16 | order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales 17 | limit 100 18 | 19 | -- end query 1 in stream 0 using template query43.tpl 20 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q44.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query44.tpl and seed 1819994127 2 | select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing 3 | from(select * 4 | from (select item_sk,rank() over (order by rank_col asc) rnk 5 | from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col 6 | from store_sales ss1 7 | where ss_store_sk = 410 8 | group by ss_item_sk 9 | having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col 10 | from store_sales 11 | where ss_store_sk = 410 12 | and ss_hdemo_sk is null 13 | group by ss_store_sk))V1)V11 14 | where rnk < 11) asceding, 15 | (select * 16 | from (select item_sk,rank() over (order by rank_col desc) rnk 17 | from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col 18 | from store_sales ss1 19 | where ss_store_sk = 410 20 | group by ss_item_sk 21 | having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col 22 | from store_sales 23 | where ss_store_sk = 410 24 | and ss_hdemo_sk is null 25 | group by ss_store_sk))V2)V21 26 | where rnk < 11) descending, 27 | item i1, 28 | item i2 29 | where asceding.rnk = descending.rnk 30 | and i1.i_item_sk=asceding.item_sk 31 | and i2.i_item_sk=descending.item_sk 32 | order by asceding.rnk 33 | limit 100 34 | 35 | -- end query 1 in stream 0 using template query44.tpl 36 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q45.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query45.tpl and seed 2031708268 2 | select ca_zip, ca_county, sum(ws_sales_price) 3 | from web_sales, customer, customer_address, date_dim, item 4 | where ws_bill_customer_sk = c_customer_sk 5 | and c_current_addr_sk = ca_address_sk 6 | and ws_item_sk = i_item_sk 7 | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') 8 | or 9 | i_item_id in (select i_item_id 10 | from item 11 | where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 12 | ) 13 | ) 14 | and ws_sold_date_sk = d_date_sk 15 | and d_qoy = 2 and d_year = 2000 16 | group by ca_zip, ca_county 17 | order by ca_zip, ca_county 18 | limit 100 19 | 20 | -- end query 1 in stream 0 using template query45.tpl 21 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q46.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query46.tpl and seed 803547492 2 | select c_last_name 3 | ,c_first_name 4 | ,ca_city 5 | ,bought_city 6 | ,ss_ticket_number 7 | ,amt,profit 8 | from 9 | (select ss_ticket_number 10 | ,ss_customer_sk 11 | ,ca_city bought_city 12 | ,sum(ss_coupon_amt) amt 13 | ,sum(ss_net_profit) profit 14 | from store_sales,date_dim,store,household_demographics,customer_address 15 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 16 | and store_sales.ss_store_sk = store.s_store_sk 17 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 18 | and store_sales.ss_addr_sk = customer_address.ca_address_sk 19 | and (household_demographics.hd_dep_count = 2 or 20 | household_demographics.hd_vehicle_count= 1) 21 | and date_dim.d_dow in (6,0) 22 | and date_dim.d_year in (1998,1998+1,1998+2) 23 | and store.s_city in ('Cedar Grove','Wildwood','Union','Salem','Highland Park') 24 | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr 25 | where ss_customer_sk = c_customer_sk 26 | and customer.c_current_addr_sk = current_addr.ca_address_sk 27 | and current_addr.ca_city <> bought_city 28 | order by c_last_name 29 | ,c_first_name 30 | ,ca_city 31 | ,bought_city 32 | ,ss_ticket_number 33 | limit 100 34 | 35 | -- end query 1 in stream 0 using template query46.tpl 36 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q47.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query47.tpl and seed 2031708268 2 | with v1 as( 3 | select i_category, i_brand, 4 | s_store_name, s_company_name, 5 | d_year, d_moy, 6 | sum(ss_sales_price) sum_sales, 7 | avg(sum(ss_sales_price)) over 8 | (partition by i_category, i_brand, 9 | s_store_name, s_company_name, d_year) 10 | avg_monthly_sales, 11 | rank() over 12 | (partition by i_category, i_brand, 13 | s_store_name, s_company_name 14 | order by d_year, d_moy) rn 15 | from item, store_sales, date_dim, store 16 | where ss_item_sk = i_item_sk and 17 | ss_sold_date_sk = d_date_sk and 18 | ss_store_sk = s_store_sk and 19 | ( 20 | d_year = 2000 or 21 | ( d_year = 2000-1 and d_moy =12) or 22 | ( d_year = 2000+1 and d_moy =1) 23 | ) 24 | group by i_category, i_brand, 25 | s_store_name, s_company_name, 26 | d_year, d_moy), 27 | v2 as( 28 | select v1.i_category 29 | ,v1.d_year, v1.d_moy 30 | ,v1.avg_monthly_sales 31 | ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum 32 | from v1, v1 v1_lag, v1 v1_lead 33 | where v1.i_category = v1_lag.i_category and 34 | v1.i_category = v1_lead.i_category and 35 | v1.i_brand = v1_lag.i_brand and 36 | v1.i_brand = v1_lead.i_brand and 37 | v1.s_store_name = v1_lag.s_store_name and 38 | v1.s_store_name = v1_lead.s_store_name and 39 | v1.s_company_name = v1_lag.s_company_name and 40 | v1.s_company_name = v1_lead.s_company_name and 41 | v1.rn = v1_lag.rn + 1 and 42 | v1.rn = v1_lead.rn - 1) 43 | select * 44 | from v2 45 | where d_year = 2000 and 46 | avg_monthly_sales > 0 and 47 | case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 48 | order by sum_sales - avg_monthly_sales, 3 49 | limit 100 50 | 51 | -- end query 1 in stream 0 using template query47.tpl 52 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q48.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query48.tpl and seed 622697896 2 | select sum (ss_quantity) 3 | from store_sales, store, customer_demographics, customer_address, date_dim 4 | where s_store_sk = ss_store_sk 5 | and ss_sold_date_sk = d_date_sk and d_year = 1998 6 | and 7 | ( 8 | ( 9 | cd_demo_sk = ss_cdemo_sk 10 | and 11 | cd_marital_status = 'M' 12 | and 13 | cd_education_status = '4 yr Degree' 14 | and 15 | ss_sales_price between 100.00 and 150.00 16 | ) 17 | or 18 | ( 19 | cd_demo_sk = ss_cdemo_sk 20 | and 21 | cd_marital_status = 'M' 22 | and 23 | cd_education_status = '4 yr Degree' 24 | and 25 | ss_sales_price between 50.00 and 100.00 26 | ) 27 | or 28 | ( 29 | cd_demo_sk = ss_cdemo_sk 30 | and 31 | cd_marital_status = 'M' 32 | and 33 | cd_education_status = '4 yr Degree' 34 | and 35 | ss_sales_price between 150.00 and 200.00 36 | ) 37 | ) 38 | and 39 | ( 40 | ( 41 | ss_addr_sk = ca_address_sk 42 | and 43 | ca_country = 'United States' 44 | and 45 | ca_state in ('KY', 'GA', 'NM') 46 | and ss_net_profit between 0 and 2000 47 | ) 48 | or 49 | (ss_addr_sk = ca_address_sk 50 | and 51 | ca_country = 'United States' 52 | and 53 | ca_state in ('MT', 'OR', 'IN') 54 | and ss_net_profit between 150 and 3000 55 | ) 56 | or 57 | (ss_addr_sk = ca_address_sk 58 | and 59 | ca_country = 'United States' 60 | and 61 | ca_state in ('WI', 'MO', 'WV') 62 | and ss_net_profit between 50 and 25000 63 | ) 64 | ) 65 | 66 | 67 | -- end query 1 in stream 0 using template query48.tpl 68 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q50.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query50.tpl and seed 1819994127 2 | select 3 | s_store_name 4 | ,s_company_id 5 | ,s_street_number 6 | ,s_street_name 7 | ,s_street_type 8 | ,s_suite_number 9 | ,s_city 10 | ,s_county 11 | ,s_state 12 | ,s_zip 13 | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` 14 | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and 15 | (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` 16 | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and 17 | (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` 18 | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and 19 | (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` 20 | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` 21 | from 22 | store_sales 23 | ,store_returns 24 | ,store 25 | ,date_dim d1 26 | ,date_dim d2 27 | where 28 | d2.d_year = 2000 29 | and d2.d_moy = 9 30 | and ss_ticket_number = sr_ticket_number 31 | and ss_item_sk = sr_item_sk 32 | and ss_sold_date_sk = d1.d_date_sk 33 | and sr_returned_date_sk = d2.d_date_sk 34 | and ss_customer_sk = sr_customer_sk 35 | and ss_store_sk = s_store_sk 36 | group by 37 | s_store_name 38 | ,s_company_id 39 | ,s_street_number 40 | ,s_street_name 41 | ,s_street_type 42 | ,s_suite_number 43 | ,s_city 44 | ,s_county 45 | ,s_state 46 | ,s_zip 47 | order by s_store_name 48 | ,s_company_id 49 | ,s_street_number 50 | ,s_street_name 51 | ,s_street_type 52 | ,s_suite_number 53 | ,s_city 54 | ,s_county 55 | ,s_state 56 | ,s_zip 57 | limit 100 58 | 59 | -- end query 1 in stream 0 using template query50.tpl 60 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q51.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query51.tpl and seed 1819994127 2 | WITH web_v1 as ( 3 | select 4 | ws_item_sk item_sk, d_date, 5 | sum(sum(ws_sales_price)) 6 | over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales 7 | from web_sales 8 | ,date_dim 9 | where ws_sold_date_sk=d_date_sk 10 | and d_month_seq between 1212 and 1212+11 11 | and ws_item_sk is not NULL 12 | group by ws_item_sk, d_date), 13 | store_v1 as ( 14 | select 15 | ss_item_sk item_sk, d_date, 16 | sum(sum(ss_sales_price)) 17 | over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales 18 | from store_sales 19 | ,date_dim 20 | where ss_sold_date_sk=d_date_sk 21 | and d_month_seq between 1212 and 1212+11 22 | and ss_item_sk is not NULL 23 | group by ss_item_sk, d_date) 24 | select * 25 | from (select item_sk 26 | ,d_date 27 | ,web_sales 28 | ,store_sales 29 | ,max(web_sales) 30 | over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative 31 | ,max(store_sales) 32 | over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative 33 | from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk 34 | ,case when web.d_date is not null then web.d_date else store.d_date end d_date 35 | ,web.cume_sales web_sales 36 | ,store.cume_sales store_sales 37 | from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk 38 | and web.d_date = store.d_date) 39 | )x )y 40 | where web_cumulative > store_cumulative 41 | order by item_sk 42 | ,d_date 43 | limit 100 44 | 45 | -- end query 1 in stream 0 using template query51.tpl 46 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q52.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query52.tpl and seed 1819994127 2 | select dt.d_year 3 | ,item.i_brand_id brand_id 4 | ,item.i_brand brand 5 | ,sum(ss_ext_sales_price) ext_price 6 | from date_dim dt 7 | ,store_sales 8 | ,item 9 | where dt.d_date_sk = store_sales.ss_sold_date_sk 10 | and store_sales.ss_item_sk = item.i_item_sk 11 | and item.i_manager_id = 1 12 | and dt.d_moy=12 13 | and dt.d_year=1998 14 | group by dt.d_year 15 | ,item.i_brand 16 | ,item.i_brand_id 17 | order by dt.d_year 18 | ,ext_price desc 19 | ,brand_id 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query52.tpl 23 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q53.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query53.tpl and seed 1819994127 2 | select * from 3 | (select i_manufact_id, 4 | sum(ss_sales_price) sum_sales, 5 | avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales 6 | from item, store_sales, date_dim, store 7 | where ss_item_sk = i_item_sk and 8 | ss_sold_date_sk = d_date_sk and 9 | ss_store_sk = s_store_sk and 10 | d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and 11 | ((i_category in ('Books','Children','Electronics') and 12 | i_class in ('personal','portable','reference','self-help') and 13 | i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', 14 | 'exportiunivamalg #9','scholaramalgamalg #9')) 15 | or(i_category in ('Women','Music','Men') and 16 | i_class in ('accessories','classical','fragrances','pants') and 17 | i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', 18 | 'importoamalg #1'))) 19 | group by i_manufact_id, d_qoy ) tmp1 20 | where case when avg_quarterly_sales > 0 21 | then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales 22 | else null end > 0.1 23 | order by avg_quarterly_sales, 24 | sum_sales, 25 | i_manufact_id 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query53.tpl 29 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q54.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query54.tpl and seed 1930872976 2 | with my_customers as ( 3 | select distinct c_customer_sk 4 | , c_current_addr_sk 5 | from 6 | ( select cs_sold_date_sk sold_date_sk, 7 | cs_bill_customer_sk customer_sk, 8 | cs_item_sk item_sk 9 | from catalog_sales 10 | union all 11 | select ws_sold_date_sk sold_date_sk, 12 | ws_bill_customer_sk customer_sk, 13 | ws_item_sk item_sk 14 | from web_sales 15 | ) cs_or_ws_sales, 16 | item, 17 | date_dim, 18 | customer 19 | where sold_date_sk = d_date_sk 20 | and item_sk = i_item_sk 21 | and i_category = 'Jewelry' 22 | and i_class = 'consignment' 23 | and c_customer_sk = cs_or_ws_sales.customer_sk 24 | and d_moy = 3 25 | and d_year = 1999 26 | ) 27 | , my_revenue as ( 28 | select c_customer_sk, 29 | sum(ss_ext_sales_price) as revenue 30 | from my_customers, 31 | store_sales, 32 | customer_address, 33 | store, 34 | date_dim 35 | where c_current_addr_sk = ca_address_sk 36 | and ca_county = s_county 37 | and ca_state = s_state 38 | and ss_sold_date_sk = d_date_sk 39 | and c_customer_sk = ss_customer_sk 40 | and d_month_seq between (select distinct d_month_seq+1 41 | from date_dim where d_year = 1999 and d_moy = 3) 42 | and (select distinct d_month_seq+3 43 | from date_dim where d_year = 1999 and d_moy = 3) 44 | group by c_customer_sk 45 | ) 46 | , segments as 47 | (select cast((revenue/50) as int) as segment 48 | from my_revenue 49 | ) 50 | select segment, count(*) as num_customers, segment*50 as segment_base 51 | from segments 52 | group by segment 53 | order by segment, num_customers 54 | limit 100 55 | 56 | -- end query 1 in stream 0 using template query54.tpl 57 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q55.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query55.tpl and seed 2031708268 2 | select i_brand_id brand_id, i_brand brand, 3 | sum(ss_ext_sales_price) ext_price 4 | from date_dim, store_sales, item 5 | where d_date_sk = ss_sold_date_sk 6 | and ss_item_sk = i_item_sk 7 | and i_manager_id=36 8 | and d_moy=12 9 | and d_year=2001 10 | group by i_brand, i_brand_id 11 | order by ext_price desc, i_brand_id 12 | limit 100 13 | 14 | -- end query 1 in stream 0 using template query55.tpl 15 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q56.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query56.tpl and seed 1951559352 2 | with ss as ( 3 | select i_item_id,sum(ss_ext_sales_price) total_sales 4 | from 5 | store_sales, 6 | date_dim, 7 | customer_address, 8 | item 9 | where i_item_id in (select 10 | i_item_id 11 | from item 12 | where i_color in ('orchid','chiffon','lace')) 13 | and ss_item_sk = i_item_sk 14 | and ss_sold_date_sk = d_date_sk 15 | and d_year = 2000 16 | and d_moy = 1 17 | and ss_addr_sk = ca_address_sk 18 | and ca_gmt_offset = -8 19 | group by i_item_id), 20 | cs as ( 21 | select i_item_id,sum(cs_ext_sales_price) total_sales 22 | from 23 | catalog_sales, 24 | date_dim, 25 | customer_address, 26 | item 27 | where 28 | i_item_id in (select 29 | i_item_id 30 | from item 31 | where i_color in ('orchid','chiffon','lace')) 32 | and cs_item_sk = i_item_sk 33 | and cs_sold_date_sk = d_date_sk 34 | and d_year = 2000 35 | and d_moy = 1 36 | and cs_bill_addr_sk = ca_address_sk 37 | and ca_gmt_offset = -8 38 | group by i_item_id), 39 | ws as ( 40 | select i_item_id,sum(ws_ext_sales_price) total_sales 41 | from 42 | web_sales, 43 | date_dim, 44 | customer_address, 45 | item 46 | where 47 | i_item_id in (select 48 | i_item_id 49 | from item 50 | where i_color in ('orchid','chiffon','lace')) 51 | and ws_item_sk = i_item_sk 52 | and ws_sold_date_sk = d_date_sk 53 | and d_year = 2000 54 | and d_moy = 1 55 | and ws_bill_addr_sk = ca_address_sk 56 | and ca_gmt_offset = -8 57 | group by i_item_id) 58 | select i_item_id ,sum(total_sales) total_sales 59 | from (select * from ss 60 | union all 61 | select * from cs 62 | union all 63 | select * from ws) tmp1 64 | group by i_item_id 65 | order by total_sales 66 | limit 100 67 | 68 | -- end query 1 in stream 0 using template query56.tpl 69 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q57.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query57.tpl and seed 2031708268 2 | with v1 as( 3 | select i_category, i_brand, 4 | cc_name, 5 | d_year, d_moy, 6 | sum(cs_sales_price) sum_sales, 7 | avg(sum(cs_sales_price)) over 8 | (partition by i_category, i_brand, 9 | cc_name, d_year) 10 | avg_monthly_sales, 11 | rank() over 12 | (partition by i_category, i_brand, 13 | cc_name 14 | order by d_year, d_moy) rn 15 | from item, catalog_sales, date_dim, call_center 16 | where cs_item_sk = i_item_sk and 17 | cs_sold_date_sk = d_date_sk and 18 | cc_call_center_sk= cs_call_center_sk and 19 | ( 20 | d_year = 2000 or 21 | ( d_year = 2000-1 and d_moy =12) or 22 | ( d_year = 2000+1 and d_moy =1) 23 | ) 24 | group by i_category, i_brand, 25 | cc_name , d_year, d_moy), 26 | v2 as( 27 | select v1.i_category, v1.i_brand 28 | ,v1.d_year, v1.d_moy 29 | ,v1.avg_monthly_sales 30 | ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum 31 | from v1, v1 v1_lag, v1 v1_lead 32 | where v1.i_category = v1_lag.i_category and 33 | v1.i_category = v1_lead.i_category and 34 | v1.i_brand = v1_lag.i_brand and 35 | v1.i_brand = v1_lead.i_brand and 36 | v1. cc_name = v1_lag. cc_name and 37 | v1. cc_name = v1_lead. cc_name and 38 | v1.rn = v1_lag.rn + 1 and 39 | v1.rn = v1_lead.rn - 1) 40 | select * 41 | from v2 42 | where d_year = 2000 and 43 | avg_monthly_sales > 0 and 44 | case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 45 | order by sum_sales - avg_monthly_sales, 3 46 | limit 100 47 | 48 | -- end query 1 in stream 0 using template query57.tpl 49 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q58.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query58.tpl and seed 1819994127 2 | with ss_items as 3 | (select i_item_id item_id 4 | ,sum(ss_ext_sales_price) ss_item_rev 5 | from store_sales 6 | ,item 7 | ,date_dim 8 | where ss_item_sk = i_item_sk 9 | and d_date in (select d_date 10 | from date_dim 11 | where d_week_seq = (select d_week_seq 12 | from date_dim 13 | where d_date = '1998-02-19')) 14 | and ss_sold_date_sk = d_date_sk 15 | group by i_item_id), 16 | cs_items as 17 | (select i_item_id item_id 18 | ,sum(cs_ext_sales_price) cs_item_rev 19 | from catalog_sales 20 | ,item 21 | ,date_dim 22 | where cs_item_sk = i_item_sk 23 | and d_date in (select d_date 24 | from date_dim 25 | where d_week_seq = (select d_week_seq 26 | from date_dim 27 | where d_date = '1998-02-19')) 28 | and cs_sold_date_sk = d_date_sk 29 | group by i_item_id), 30 | ws_items as 31 | (select i_item_id item_id 32 | ,sum(ws_ext_sales_price) ws_item_rev 33 | from web_sales 34 | ,item 35 | ,date_dim 36 | where ws_item_sk = i_item_sk 37 | and d_date in (select d_date 38 | from date_dim 39 | where d_week_seq =(select d_week_seq 40 | from date_dim 41 | where d_date = '1998-02-19')) 42 | and ws_sold_date_sk = d_date_sk 43 | group by i_item_id) 44 | select ss_items.item_id 45 | ,ss_item_rev 46 | ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev 47 | ,cs_item_rev 48 | ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev 49 | ,ws_item_rev 50 | ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev 51 | ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average 52 | from ss_items,cs_items,ws_items 53 | where ss_items.item_id=cs_items.item_id 54 | and ss_items.item_id=ws_items.item_id 55 | and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev 56 | and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev 57 | and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev 58 | and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev 59 | and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev 60 | and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev 61 | order by item_id 62 | ,ss_item_rev 63 | limit 100 64 | 65 | -- end query 1 in stream 0 using template query58.tpl 66 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q59.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query59.tpl and seed 1819994127 2 | with wss as 3 | (select d_week_seq, 4 | ss_store_sk, 5 | sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, 6 | sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, 7 | sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, 8 | sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, 9 | sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, 10 | sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, 11 | sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales 12 | from store_sales,date_dim 13 | where d_date_sk = ss_sold_date_sk 14 | group by d_week_seq,ss_store_sk 15 | ) 16 | select s_store_name1,s_store_id1,d_week_seq1 17 | ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 18 | ,tue_sales1/tue_sales1,wed_sales1/wed_sales2,thu_sales1/thu_sales2 19 | ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 20 | from 21 | (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 22 | ,s_store_id s_store_id1,sun_sales sun_sales1 23 | ,mon_sales mon_sales1,tue_sales tue_sales1 24 | ,wed_sales wed_sales1,thu_sales thu_sales1 25 | ,fri_sales fri_sales1,sat_sales sat_sales1 26 | from wss,store,date_dim d 27 | where d.d_week_seq = wss.d_week_seq and 28 | ss_store_sk = s_store_sk and 29 | d_month_seq between 1185 and 1185 + 11) y, 30 | (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 31 | ,s_store_id s_store_id2,sun_sales sun_sales2 32 | ,mon_sales mon_sales2,tue_sales tue_sales2 33 | ,wed_sales wed_sales2,thu_sales thu_sales2 34 | ,fri_sales fri_sales2,sat_sales sat_sales2 35 | from wss,store,date_dim d 36 | where d.d_week_seq = wss.d_week_seq and 37 | ss_store_sk = s_store_sk and 38 | d_month_seq between 1185+ 12 and 1185 + 23) x 39 | where s_store_id1=s_store_id2 40 | and d_week_seq1=d_week_seq2-52 41 | order by s_store_name1,s_store_id1,d_week_seq1 42 | limit 100 43 | 44 | -- end query 1 in stream 0 using template query59.tpl 45 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q6.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query6.tpl and seed 1819994127 2 | select a.ca_state state, count(*) cnt 3 | from customer_address a 4 | ,customer c 5 | ,store_sales s 6 | ,date_dim d 7 | ,item i 8 | where a.ca_address_sk = c.c_current_addr_sk 9 | and c.c_customer_sk = s.ss_customer_sk 10 | and s.ss_sold_date_sk = d.d_date_sk 11 | and s.ss_item_sk = i.i_item_sk 12 | and d.d_month_seq = 13 | (select distinct (d_month_seq) 14 | from date_dim 15 | where d_year = 2000 16 | and d_moy = 2 ) 17 | and i.i_current_price > 1.2 * 18 | (select avg(j.i_current_price) 19 | from item j 20 | where j.i_category = i.i_category) 21 | group by a.ca_state 22 | having count(*) >= 10 23 | order by cnt 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query6.tpl 27 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q60.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query60.tpl and seed 1930872976 2 | with ss as ( 3 | select 4 | i_item_id,sum(ss_ext_sales_price) total_sales 5 | from 6 | store_sales, 7 | date_dim, 8 | customer_address, 9 | item 10 | where 11 | i_item_id in (select 12 | i_item_id 13 | from 14 | item 15 | where i_category in ('Children')) 16 | and ss_item_sk = i_item_sk 17 | and ss_sold_date_sk = d_date_sk 18 | and d_year = 1999 19 | and d_moy = 9 20 | and ss_addr_sk = ca_address_sk 21 | and ca_gmt_offset = -6 22 | group by i_item_id), 23 | cs as ( 24 | select 25 | i_item_id,sum(cs_ext_sales_price) total_sales 26 | from 27 | catalog_sales, 28 | date_dim, 29 | customer_address, 30 | item 31 | where 32 | i_item_id in (select 33 | i_item_id 34 | from 35 | item 36 | where i_category in ('Children')) 37 | and cs_item_sk = i_item_sk 38 | and cs_sold_date_sk = d_date_sk 39 | and d_year = 1999 40 | and d_moy = 9 41 | and cs_bill_addr_sk = ca_address_sk 42 | and ca_gmt_offset = -6 43 | group by i_item_id), 44 | ws as ( 45 | select 46 | i_item_id,sum(ws_ext_sales_price) total_sales 47 | from 48 | web_sales, 49 | date_dim, 50 | customer_address, 51 | item 52 | where 53 | i_item_id in (select 54 | i_item_id 55 | from 56 | item 57 | where i_category in ('Children')) 58 | and ws_item_sk = i_item_sk 59 | and ws_sold_date_sk = d_date_sk 60 | and d_year = 1999 61 | and d_moy = 9 62 | and ws_bill_addr_sk = ca_address_sk 63 | and ca_gmt_offset = -6 64 | group by i_item_id) 65 | select 66 | i_item_id 67 | ,sum(total_sales) total_sales 68 | from (select * from ss 69 | union all 70 | select * from cs 71 | union all 72 | select * from ws) tmp1 73 | group by i_item_id 74 | order by i_item_id 75 | ,total_sales 76 | limit 100 77 | 78 | -- end query 1 in stream 0 using template query60.tpl 79 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q61.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query61.tpl and seed 1930872976 2 | select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 3 | from 4 | (select sum(ss_ext_sales_price) promotions 5 | from store_sales 6 | ,store 7 | ,promotion 8 | ,date_dim 9 | ,customer 10 | ,customer_address 11 | ,item 12 | where ss_sold_date_sk = d_date_sk 13 | and ss_store_sk = s_store_sk 14 | and ss_promo_sk = p_promo_sk 15 | and ss_customer_sk= c_customer_sk 16 | and ca_address_sk = c_current_addr_sk 17 | and ss_item_sk = i_item_sk 18 | and ca_gmt_offset = -7 19 | and i_category = 'Electronics' 20 | and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') 21 | and s_gmt_offset = -7 22 | and d_year = 1999 23 | and d_moy = 11) promotional_sales, 24 | (select sum(ss_ext_sales_price) total 25 | from store_sales 26 | ,store 27 | ,date_dim 28 | ,customer 29 | ,customer_address 30 | ,item 31 | where ss_sold_date_sk = d_date_sk 32 | and ss_store_sk = s_store_sk 33 | and ss_customer_sk= c_customer_sk 34 | and ca_address_sk = c_current_addr_sk 35 | and ss_item_sk = i_item_sk 36 | and ca_gmt_offset = -7 37 | and i_category = 'Electronics' 38 | and s_gmt_offset = -7 39 | and d_year = 1999 40 | and d_moy = 11) all_sales 41 | order by promotions, total 42 | limit 100 43 | 44 | -- end query 1 in stream 0 using template query61.tpl 45 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q62.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query62.tpl and seed 1819994127 2 | select 3 | substr(w_warehouse_name,1,20) 4 | ,sm_type 5 | ,web_name 6 | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` 7 | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and 8 | (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` 9 | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and 10 | (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` 11 | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and 12 | (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` 13 | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` 14 | from 15 | web_sales 16 | ,warehouse 17 | ,ship_mode 18 | ,web_site 19 | ,date_dim 20 | where 21 | d_month_seq between 1212 and 1212 + 11 22 | and ws_ship_date_sk = d_date_sk 23 | and ws_warehouse_sk = w_warehouse_sk 24 | and ws_ship_mode_sk = sm_ship_mode_sk 25 | and ws_web_site_sk = web_site_sk 26 | group by 27 | substr(w_warehouse_name,1,20) 28 | ,sm_type 29 | ,web_name 30 | order by substr(w_warehouse_name,1,20) 31 | ,sm_type 32 | ,web_name 33 | limit 100 34 | 35 | -- end query 1 in stream 0 using template query62.tpl 36 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q63.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query63.tpl and seed 1819994127 2 | select * 3 | from (select i_manager_id 4 | ,sum(ss_sales_price) sum_sales 5 | ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales 6 | from item 7 | ,store_sales 8 | ,date_dim 9 | ,store 10 | where ss_item_sk = i_item_sk 11 | and ss_sold_date_sk = d_date_sk 12 | and ss_store_sk = s_store_sk 13 | and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) 14 | and (( i_category in ('Books','Children','Electronics') 15 | and i_class in ('personal','portable','refernece','self-help') 16 | and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', 17 | 'exportiunivamalg #9','scholaramalgamalg #9')) 18 | or( i_category in ('Women','Music','Men') 19 | and i_class in ('accessories','classical','fragrances','pants') 20 | and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', 21 | 'importoamalg #1'))) 22 | group by i_manager_id, d_moy) tmp1 23 | where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 24 | order by i_manager_id 25 | ,avg_monthly_sales 26 | ,sum_sales 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query63.tpl 30 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q64.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query64.tpl and seed 1220860970 2 | with cs_ui as 3 | (select cs_item_sk 4 | ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund 5 | from catalog_sales 6 | ,catalog_returns 7 | where cs_item_sk = cr_item_sk 8 | and cs_order_number = cr_order_number 9 | group by cs_item_sk 10 | having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), 11 | cross_sales as 12 | (select i_product_name product_name 13 | ,i_item_sk item_sk 14 | ,s_store_name store_name 15 | ,s_zip store_zip 16 | ,ad1.ca_street_number b_street_number 17 | ,ad1.ca_street_name b_streen_name 18 | ,ad1.ca_city b_city 19 | ,ad1.ca_zip b_zip 20 | ,ad2.ca_street_number c_street_number 21 | ,ad2.ca_street_name c_street_name 22 | ,ad2.ca_city c_city 23 | ,ad2.ca_zip c_zip 24 | ,d1.d_year as syear 25 | ,d2.d_year as fsyear 26 | ,d3.d_year s2year 27 | ,count(*) cnt 28 | ,sum(ss_wholesale_cost) s1 29 | ,sum(ss_list_price) s2 30 | ,sum(ss_coupon_amt) s3 31 | FROM store_sales 32 | ,store_returns 33 | ,cs_ui 34 | ,date_dim d1 35 | ,date_dim d2 36 | ,date_dim d3 37 | ,store 38 | ,customer 39 | ,customer_demographics cd1 40 | ,customer_demographics cd2 41 | ,promotion 42 | ,household_demographics hd1 43 | ,household_demographics hd2 44 | ,customer_address ad1 45 | ,customer_address ad2 46 | ,income_band ib1 47 | ,income_band ib2 48 | ,item 49 | WHERE ss_store_sk = s_store_sk AND 50 | ss_sold_date_sk = d1.d_date_sk AND 51 | ss_customer_sk = c_customer_sk AND 52 | ss_cdemo_sk= cd1.cd_demo_sk AND 53 | ss_hdemo_sk = hd1.hd_demo_sk AND 54 | ss_addr_sk = ad1.ca_address_sk and 55 | ss_item_sk = i_item_sk and 56 | ss_item_sk = sr_item_sk and 57 | ss_ticket_number = sr_ticket_number and 58 | ss_item_sk = cs_ui.cs_item_sk and 59 | c_current_cdemo_sk = cd2.cd_demo_sk AND 60 | c_current_hdemo_sk = hd2.hd_demo_sk AND 61 | c_current_addr_sk = ad2.ca_address_sk and 62 | c_first_sales_date_sk = d2.d_date_sk and 63 | c_first_shipto_date_sk = d3.d_date_sk and 64 | ss_promo_sk = p_promo_sk and 65 | hd1.hd_income_band_sk = ib1.ib_income_band_sk and 66 | hd2.hd_income_band_sk = ib2.ib_income_band_sk and 67 | cd1.cd_marital_status <> cd2.cd_marital_status and 68 | i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and 69 | i_current_price between 35 and 35 + 10 and 70 | i_current_price between 35 + 1 and 35 + 15 71 | group by i_product_name 72 | ,i_item_sk 73 | ,s_store_name 74 | ,s_zip 75 | ,ad1.ca_street_number 76 | ,ad1.ca_street_name 77 | ,ad1.ca_city 78 | ,ad1.ca_zip 79 | ,ad2.ca_street_number 80 | ,ad2.ca_street_name 81 | ,ad2.ca_city 82 | ,ad2.ca_zip 83 | ,d1.d_year 84 | ,d2.d_year 85 | ,d3.d_year 86 | ) 87 | select cs1.product_name 88 | ,cs1.store_name 89 | ,cs1.store_zip 90 | ,cs1.b_street_number 91 | ,cs1.b_streen_name 92 | ,cs1.b_city 93 | ,cs1.b_zip 94 | ,cs1.c_street_number 95 | ,cs1.c_street_name 96 | ,cs1.c_city 97 | ,cs1.c_zip 98 | ,cs1.syear 99 | ,cs1.cnt 100 | ,cs1.s1 101 | ,cs1.s2 102 | ,cs1.s3 103 | ,cs2.s1 104 | ,cs2.s2 105 | ,cs2.s3 106 | ,cs2.syear 107 | ,cs2.cnt 108 | from cross_sales cs1,cross_sales cs2 109 | where cs1.item_sk=cs2.item_sk and 110 | cs1.syear = 2000 and 111 | cs2.syear = 2000 + 1 and 112 | cs2.cnt <= cs1.cnt and 113 | cs1.store_name = cs2.store_name and 114 | cs1.store_zip = cs2.store_zip 115 | order by cs1.product_name 116 | ,cs1.store_name 117 | ,cs2.cnt 118 | 119 | -- end query 1 in stream 0 using template query64.tpl 120 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q65.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query65.tpl and seed 1819994127 2 | select 3 | s_store_name, 4 | i_item_desc, 5 | sc.revenue, 6 | i_current_price, 7 | i_wholesale_cost, 8 | i_brand 9 | from store, item, 10 | (select ss_store_sk, avg(revenue) as ave 11 | from 12 | (select ss_store_sk, ss_item_sk, 13 | sum(ss_sales_price) as revenue 14 | from store_sales, date_dim 15 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 16 | group by ss_store_sk, ss_item_sk) sa 17 | group by ss_store_sk) sb, 18 | (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue 19 | from store_sales, date_dim 20 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 21 | group by ss_store_sk, ss_item_sk) sc 22 | where sb.ss_store_sk = sc.ss_store_sk and 23 | sc.revenue <= 0.1 * sb.ave and 24 | s_store_sk = sc.ss_store_sk and 25 | i_item_sk = sc.ss_item_sk 26 | order by s_store_name, i_item_desc 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query65.tpl 30 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q67.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query67.tpl and seed 1819994127 2 | select * 3 | from (select i_category 4 | ,i_class 5 | ,i_brand 6 | ,i_product_name 7 | ,d_year 8 | ,d_qoy 9 | ,d_moy 10 | ,s_store_id 11 | ,sumsales 12 | ,rank() over (partition by i_category order by sumsales desc) rk 13 | from (select i_category 14 | ,i_class 15 | ,i_brand 16 | ,i_product_name 17 | ,d_year 18 | ,d_qoy 19 | ,d_moy 20 | ,s_store_id 21 | ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales 22 | from store_sales 23 | ,date_dim 24 | ,store 25 | ,item 26 | where ss_sold_date_sk=d_date_sk 27 | and ss_item_sk=i_item_sk 28 | and ss_store_sk = s_store_sk 29 | and d_month_seq between 1212 and 1212+11 30 | group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 31 | where rk <= 100 32 | order by i_category 33 | ,i_class 34 | ,i_brand 35 | ,i_product_name 36 | ,d_year 37 | ,d_qoy 38 | ,d_moy 39 | ,s_store_id 40 | ,sumsales 41 | ,rk 42 | limit 100 43 | 44 | -- end query 1 in stream 0 using template query67.tpl 45 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q68.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query68.tpl and seed 803547492 2 | select c_last_name 3 | ,c_first_name 4 | ,ca_city 5 | ,bought_city 6 | ,ss_ticket_number 7 | ,extended_price 8 | ,extended_tax 9 | ,list_price 10 | from (select ss_ticket_number 11 | ,ss_customer_sk 12 | ,ca_city bought_city 13 | ,sum(ss_ext_sales_price) extended_price 14 | ,sum(ss_ext_list_price) list_price 15 | ,sum(ss_ext_tax) extended_tax 16 | from store_sales 17 | ,date_dim 18 | ,store 19 | ,household_demographics 20 | ,customer_address 21 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 22 | and store_sales.ss_store_sk = store.s_store_sk 23 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 24 | and store_sales.ss_addr_sk = customer_address.ca_address_sk 25 | and date_dim.d_dom between 1 and 2 26 | and (household_demographics.hd_dep_count = 2 or 27 | household_demographics.hd_vehicle_count= 1) 28 | and date_dim.d_year in (1998,1998+1,1998+2) 29 | and store.s_city in ('Cedar Grove','Wildwood') 30 | group by ss_ticket_number 31 | ,ss_customer_sk 32 | ,ss_addr_sk,ca_city) dn 33 | ,customer 34 | ,customer_address current_addr 35 | where ss_customer_sk = c_customer_sk 36 | and customer.c_current_addr_sk = current_addr.ca_address_sk 37 | and current_addr.ca_city <> bought_city 38 | order by c_last_name 39 | ,ss_ticket_number 40 | limit 100 41 | 42 | -- end query 1 in stream 0 using template query68.tpl 43 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q69.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query69.tpl and seed 797269820 2 | select 3 | cd_gender, 4 | cd_marital_status, 5 | cd_education_status, 6 | count(*) cnt1, 7 | cd_purchase_estimate, 8 | count(*) cnt2, 9 | cd_credit_rating, 10 | count(*) cnt3 11 | from 12 | customer c,customer_address ca,customer_demographics 13 | where 14 | c.c_current_addr_sk = ca.ca_address_sk and 15 | ca_state in ('CO','IL','MN') and 16 | cd_demo_sk = c.c_current_cdemo_sk and 17 | exists (select * 18 | from store_sales,date_dim 19 | where c.c_customer_sk = ss_customer_sk and 20 | ss_sold_date_sk = d_date_sk and 21 | d_year = 1999 and 22 | d_moy between 1 and 1+2) and 23 | (not exists (select * 24 | from web_sales,date_dim 25 | where c.c_customer_sk = ws_bill_customer_sk and 26 | ws_sold_date_sk = d_date_sk and 27 | d_year = 1999 and 28 | d_moy between 1 and 1+2) and 29 | not exists (select * 30 | from catalog_sales,date_dim 31 | where c.c_customer_sk = cs_ship_customer_sk and 32 | cs_sold_date_sk = d_date_sk and 33 | d_year = 1999 and 34 | d_moy between 1 and 1+2)) 35 | group by cd_gender, 36 | cd_marital_status, 37 | cd_education_status, 38 | cd_purchase_estimate, 39 | cd_credit_rating 40 | order by cd_gender, 41 | cd_marital_status, 42 | cd_education_status, 43 | cd_purchase_estimate, 44 | cd_credit_rating 45 | limit 100 46 | 47 | -- end query 1 in stream 0 using template query69.tpl 48 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q7.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query7.tpl and seed 1930872976 2 | select i_item_id, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | from store_sales, customer_demographics, date_dim, item, promotion 8 | where ss_sold_date_sk = d_date_sk and 9 | ss_item_sk = i_item_sk and 10 | ss_cdemo_sk = cd_demo_sk and 11 | ss_promo_sk = p_promo_sk and 12 | cd_gender = 'F' and 13 | cd_marital_status = 'W' and 14 | cd_education_status = 'Primary' and 15 | (p_channel_email = 'N' or p_channel_event = 'N') and 16 | d_year = 1998 17 | group by i_item_id 18 | order by i_item_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query7.tpl 22 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q70.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query70.tpl and seed 1819994127 2 | select 3 | sum(ss_net_profit) as total_sum 4 | ,s_state 5 | ,s_county 6 | ,grouping(s_state)+grouping(s_county) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(s_state)+grouping(s_county), 9 | case when grouping(s_county) = 0 then s_state end 10 | order by sum(ss_net_profit) desc) as rank_within_parent 11 | from 12 | store_sales 13 | ,date_dim d1 14 | ,store 15 | where 16 | d1.d_month_seq between 1212 and 1212+11 17 | and d1.d_date_sk = ss_sold_date_sk 18 | and s_store_sk = ss_store_sk 19 | and s_state in 20 | ( select s_state 21 | from (select s_state as s_state, 22 | rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking 23 | from store_sales, store, date_dim 24 | where d_month_seq between 1212 and 1212+11 25 | and d_date_sk = ss_sold_date_sk 26 | and s_store_sk = ss_store_sk 27 | group by s_state 28 | ) tmp1 29 | where ranking <= 5 30 | ) 31 | group by rollup(s_state,s_county) 32 | order by 33 | lochierarchy desc 34 | ,case when lochierarchy = 0 then s_state end 35 | ,rank_within_parent 36 | limit 100 37 | 38 | -- end query 1 in stream 0 using template query70.tpl 39 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q71.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query71.tpl and seed 2031708268 2 | select i_brand_id brand_id, i_brand brand,t_hour,t_minute, 3 | sum(ext_price) ext_price 4 | from item, (select ws_ext_sales_price as ext_price, 5 | ws_sold_date_sk as sold_date_sk, 6 | ws_item_sk as sold_item_sk, 7 | ws_sold_time_sk as time_sk 8 | from web_sales,date_dim 9 | where d_date_sk = ws_sold_date_sk 10 | and d_moy=12 11 | and d_year=2001 12 | union all 13 | select cs_ext_sales_price as ext_price, 14 | cs_sold_date_sk as sold_date_sk, 15 | cs_item_sk as sold_item_sk, 16 | cs_sold_time_sk as time_sk 17 | from catalog_sales,date_dim 18 | where d_date_sk = cs_sold_date_sk 19 | and d_moy=12 20 | and d_year=2001 21 | union all 22 | select ss_ext_sales_price as ext_price, 23 | ss_sold_date_sk as sold_date_sk, 24 | ss_item_sk as sold_item_sk, 25 | ss_sold_time_sk as time_sk 26 | from store_sales,date_dim 27 | where d_date_sk = ss_sold_date_sk 28 | and d_moy=12 29 | and d_year=2001 30 | ) as tmp,time_dim 31 | where 32 | sold_item_sk = i_item_sk 33 | and i_manager_id=1 34 | and time_sk = t_time_sk 35 | and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') 36 | group by i_brand, i_brand_id,t_hour,t_minute 37 | order by ext_price desc, i_brand_id 38 | 39 | 40 | -- end query 1 in stream 0 using template query71.tpl 41 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q72.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query72.tpl and seed 2031708268 2 | select i_item_desc 3 | ,w_warehouse_name 4 | ,d1.d_week_seq 5 | ,count(case when p_promo_sk is null then 1 else 0 end) no_promo 6 | ,count(case when p_promo_sk is not null then 1 else 0 end) promo 7 | ,count(*) total_cnt 8 | from catalog_sales 9 | join inventory on (cs_item_sk = inv_item_sk) 10 | join warehouse on (w_warehouse_sk=inv_warehouse_sk) 11 | join item on (i_item_sk = cs_item_sk) 12 | join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) 13 | join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) 14 | join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) 15 | join date_dim d2 on (inv_date_sk = d2.d_date_sk) 16 | join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) 17 | left outer join promotion on (cs_promo_sk=p_promo_sk) 18 | left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) 19 | where d1.d_week_seq = d2.d_week_seq 20 | and inv_quantity_on_hand < cs_quantity 21 | and d3.d_date > d1.d_date + interval '5' day 22 | and hd_buy_potential = '1001-5000' 23 | and d1.d_year = 2001 24 | and hd_buy_potential = '1001-5000' 25 | and cd_marital_status = 'M' 26 | and d1.d_year = 2001 27 | group by i_item_desc,w_warehouse_name,d1.d_week_seq 28 | order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq 29 | limit 100 30 | 31 | -- end query 1 in stream 0 using template query72.tpl 32 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q73.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query73.tpl and seed 1971067816 2 | select c_last_name 3 | ,c_first_name 4 | ,c_salutation 5 | ,c_preferred_cust_flag 6 | ,ss_ticket_number 7 | ,cnt from 8 | (select ss_ticket_number 9 | ,ss_customer_sk 10 | ,count(*) cnt 11 | from store_sales,date_dim,store,household_demographics 12 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 13 | and store_sales.ss_store_sk = store.s_store_sk 14 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 15 | and date_dim.d_dom between 1 and 2 16 | and (household_demographics.hd_buy_potential = '>10000' or 17 | household_demographics.hd_buy_potential = 'unknown') 18 | and household_demographics.hd_vehicle_count > 0 19 | and case when household_demographics.hd_vehicle_count > 0 then 20 | household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 21 | and date_dim.d_year in (2000,2000+1,2000+2) 22 | and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County') 23 | group by ss_ticket_number,ss_customer_sk) dj,customer 24 | where ss_customer_sk = c_customer_sk 25 | and cnt between 1 and 5 26 | order by cnt desc 27 | 28 | -- end query 1 in stream 0 using template query73.tpl 29 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q74.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query74.tpl and seed 1556717815 2 | with year_total as ( 3 | select c_customer_id customer_id 4 | ,c_first_name customer_first_name 5 | ,c_last_name customer_last_name 6 | ,d_year as `year` 7 | ,max(ss_net_paid) year_total 8 | ,'s' sale_type 9 | from customer 10 | ,store_sales 11 | ,date_dim 12 | where c_customer_sk = ss_customer_sk 13 | and ss_sold_date_sk = d_date_sk 14 | and d_year in (2001,2001+1) 15 | group by c_customer_id 16 | ,c_first_name 17 | ,c_last_name 18 | ,d_year 19 | union all 20 | select c_customer_id customer_id 21 | ,c_first_name customer_first_name 22 | ,c_last_name customer_last_name 23 | ,d_year as `year` 24 | ,max(ws_net_paid) year_total 25 | ,'w' sale_type 26 | from customer 27 | ,web_sales 28 | ,date_dim 29 | where c_customer_sk = ws_bill_customer_sk 30 | and ws_sold_date_sk = d_date_sk 31 | and d_year in (2001,2001+1) 32 | group by c_customer_id 33 | ,c_first_name 34 | ,c_last_name 35 | ,d_year 36 | ) 37 | select 38 | t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name 39 | from year_total t_s_firstyear 40 | ,year_total t_s_secyear 41 | ,year_total t_w_firstyear 42 | ,year_total t_w_secyear 43 | where t_s_secyear.customer_id = t_s_firstyear.customer_id 44 | and t_s_firstyear.customer_id = t_w_secyear.customer_id 45 | and t_s_firstyear.customer_id = t_w_firstyear.customer_id 46 | and t_s_firstyear.sale_type = 's' 47 | and t_w_firstyear.sale_type = 'w' 48 | and t_s_secyear.sale_type = 's' 49 | and t_w_secyear.sale_type = 'w' 50 | and t_s_firstyear.`year` = 2001 51 | and t_s_secyear.`year` = 2001+1 52 | and t_w_firstyear.`year` = 2001 53 | and t_w_secyear.`year` = 2001+1 54 | and t_s_firstyear.year_total > 0 55 | and t_w_firstyear.year_total > 0 56 | and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end 57 | > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end 58 | order by 2,1,3 59 | limit 100 60 | 61 | -- end query 1 in stream 0 using template query74.tpl 62 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q75.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query75.tpl and seed 1819994127 2 | WITH all_sales AS ( 3 | SELECT d_year 4 | ,i_brand_id 5 | ,i_class_id 6 | ,i_category_id 7 | ,i_manufact_id 8 | ,SUM(sales_cnt) AS sales_cnt 9 | ,SUM(sales_amt) AS sales_amt 10 | FROM (SELECT d_year 11 | ,i_brand_id 12 | ,i_class_id 13 | ,i_category_id 14 | ,i_manufact_id 15 | ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt 16 | ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt 17 | FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk 18 | JOIN date_dim ON d_date_sk=cs_sold_date_sk 19 | LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number 20 | AND cs_item_sk=cr_item_sk) 21 | WHERE i_category='Sports' 22 | UNION 23 | SELECT d_year 24 | ,i_brand_id 25 | ,i_class_id 26 | ,i_category_id 27 | ,i_manufact_id 28 | ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt 29 | ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt 30 | FROM store_sales JOIN item ON i_item_sk=ss_item_sk 31 | JOIN date_dim ON d_date_sk=ss_sold_date_sk 32 | LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number 33 | AND ss_item_sk=sr_item_sk) 34 | WHERE i_category='Sports' 35 | UNION 36 | SELECT d_year 37 | ,i_brand_id 38 | ,i_class_id 39 | ,i_category_id 40 | ,i_manufact_id 41 | ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt 42 | ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt 43 | FROM web_sales JOIN item ON i_item_sk=ws_item_sk 44 | JOIN date_dim ON d_date_sk=ws_sold_date_sk 45 | LEFT JOIN web_returns ON (ws_order_number=wr_order_number 46 | AND ws_item_sk=wr_item_sk) 47 | WHERE i_category='Sports') sales_detail 48 | GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) 49 | SELECT prev_yr.d_year AS prev_year 50 | ,curr_yr.d_year AS`year` 51 | ,curr_yr.i_brand_id 52 | ,curr_yr.i_class_id 53 | ,curr_yr.i_category_id 54 | ,curr_yr.i_manufact_id 55 | ,prev_yr.sales_cnt AS prev_yr_cnt 56 | ,curr_yr.sales_cnt AS curr_yr_cnt 57 | ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff 58 | ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff 59 | FROM all_sales curr_yr, all_sales prev_yr 60 | WHERE curr_yr.i_brand_id=prev_yr.i_brand_id 61 | AND curr_yr.i_class_id=prev_yr.i_class_id 62 | AND curr_yr.i_category_id=prev_yr.i_category_id 63 | AND curr_yr.i_manufact_id=prev_yr.i_manufact_id 64 | AND curr_yr.d_year=2002 65 | AND prev_yr.d_year=2002-1 66 | AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 67 | ORDER BY sales_cnt_diff 68 | limit 100 69 | 70 | -- end query 1 in stream 0 using template query75.tpl 71 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q76.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query76.tpl and seed 2031708268 2 | select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( 3 | SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price 4 | FROM store_sales, item, date_dim 5 | WHERE ss_addr_sk IS NULL 6 | AND ss_sold_date_sk=d_date_sk 7 | AND ss_item_sk=i_item_sk 8 | UNION ALL 9 | SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price 10 | FROM web_sales, item, date_dim 11 | WHERE ws_web_page_sk IS NULL 12 | AND ws_sold_date_sk=d_date_sk 13 | AND ws_item_sk=i_item_sk 14 | UNION ALL 15 | SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price 16 | FROM catalog_sales, item, date_dim 17 | WHERE cs_warehouse_sk IS NULL 18 | AND cs_sold_date_sk=d_date_sk 19 | AND cs_item_sk=i_item_sk) foo 20 | GROUP BY channel, col_name, d_year, d_qoy, i_category 21 | ORDER BY channel, col_name, d_year, d_qoy, i_category 22 | limit 100 23 | 24 | -- end query 1 in stream 0 using template query76.tpl 25 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q77.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query77.tpl and seed 1819994127 2 | with ss as 3 | (select s_store_sk, 4 | sum(ss_ext_sales_price) as sales, 5 | sum(ss_net_profit) as profit 6 | from store_sales, 7 | date_dim, 8 | store 9 | where ss_sold_date_sk = d_date_sk 10 | and d_date between cast('1998-08-04' as date) 11 | and (cast('1998-08-04' as date) + interval '30' day) 12 | and ss_store_sk = s_store_sk 13 | group by s_store_sk) 14 | , 15 | sr as 16 | (select s_store_sk, 17 | sum(sr_return_amt) as `returns`, 18 | sum(sr_net_loss) as profit_loss 19 | from store_returns, 20 | date_dim, 21 | store 22 | where sr_returned_date_sk = d_date_sk 23 | and d_date between cast('1998-08-04' as date) 24 | and (cast('1998-08-04' as date) + interval '30' day) 25 | and sr_store_sk = s_store_sk 26 | group by s_store_sk), 27 | cs as 28 | (select cs_call_center_sk, 29 | sum(cs_ext_sales_price) as sales, 30 | sum(cs_net_profit) as profit 31 | from catalog_sales, 32 | date_dim 33 | where cs_sold_date_sk = d_date_sk 34 | and d_date between cast('1998-08-04' as date) 35 | and (cast('1998-08-04' as date) + interval '30' day) 36 | group by cs_call_center_sk 37 | ), 38 | cr as 39 | (select 40 | sum(cr_return_amount) as `returns`, 41 | sum(cr_net_loss) as profit_loss 42 | from catalog_returns, 43 | date_dim 44 | where cr_returned_date_sk = d_date_sk 45 | and d_date between cast('1998-08-04' as date) 46 | and (cast('1998-08-04' as date) + interval '30' day) 47 | ), 48 | ws as 49 | ( select wp_web_page_sk, 50 | sum(ws_ext_sales_price) as sales, 51 | sum(ws_net_profit) as profit 52 | from web_sales, 53 | date_dim, 54 | web_page 55 | where ws_sold_date_sk = d_date_sk 56 | and d_date between cast('1998-08-04' as date) 57 | and (cast('1998-08-04' as date) + interval '30' day) 58 | and ws_web_page_sk = wp_web_page_sk 59 | group by wp_web_page_sk), 60 | wr as 61 | (select wp_web_page_sk, 62 | sum(wr_return_amt) as `returns`, 63 | sum(wr_net_loss) as profit_loss 64 | from web_returns, 65 | date_dim, 66 | web_page 67 | where wr_returned_date_sk = d_date_sk 68 | and d_date between cast('1998-08-04' as date) 69 | and (cast('1998-08-04' as date) + interval '30' day) 70 | and wr_web_page_sk = wp_web_page_sk 71 | group by wp_web_page_sk) 72 | select channel 73 | , id 74 | , sum(sales) as sales 75 | , sum(`returns`) as `returns` 76 | , sum(profit) as profit 77 | from 78 | (select 'store channel' as channel 79 | , ss.s_store_sk as id 80 | , sales 81 | , coalesce(`returns`, 0) as `returns` 82 | , (profit - coalesce(profit_loss,0)) as profit 83 | from ss left join sr 84 | on ss.s_store_sk = sr.s_store_sk 85 | union all 86 | select 'catalog channel' as channel 87 | , cs_call_center_sk as id 88 | , sales 89 | , `returns` 90 | , (profit - profit_loss) as profit 91 | from cs 92 | , cr 93 | union all 94 | select 'web channel' as channel 95 | , ws.wp_web_page_sk as id 96 | , sales 97 | , coalesce(`returns`, 0) `returns` 98 | , (profit - coalesce(profit_loss,0)) as profit 99 | from ws left join wr 100 | on ws.wp_web_page_sk = wr.wp_web_page_sk 101 | ) x 102 | group by rollup (channel, id) 103 | order by channel 104 | ,id 105 | limit 100 106 | 107 | -- end query 1 in stream 0 using template query77.tpl 108 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q78.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query78.tpl and seed 1819994127 2 | with ws as 3 | (select d_year AS ws_sold_year, ws_item_sk, 4 | ws_bill_customer_sk ws_customer_sk, 5 | sum(ws_quantity) ws_qty, 6 | sum(ws_wholesale_cost) ws_wc, 7 | sum(ws_sales_price) ws_sp 8 | from web_sales 9 | left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk 10 | join date_dim on ws_sold_date_sk = d_date_sk 11 | where wr_order_number is null 12 | group by d_year, ws_item_sk, ws_bill_customer_sk 13 | ), 14 | cs as 15 | (select d_year AS cs_sold_year, cs_item_sk, 16 | cs_bill_customer_sk cs_customer_sk, 17 | sum(cs_quantity) cs_qty, 18 | sum(cs_wholesale_cost) cs_wc, 19 | sum(cs_sales_price) cs_sp 20 | from catalog_sales 21 | left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk 22 | join date_dim on cs_sold_date_sk = d_date_sk 23 | where cr_order_number is null 24 | group by d_year, cs_item_sk, cs_bill_customer_sk 25 | ), 26 | ss as 27 | (select d_year AS ss_sold_year, ss_item_sk, 28 | ss_customer_sk, 29 | sum(ss_quantity) ss_qty, 30 | sum(ss_wholesale_cost) ss_wc, 31 | sum(ss_sales_price) ss_sp 32 | from store_sales 33 | left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk 34 | join date_dim on ss_sold_date_sk = d_date_sk 35 | where sr_ticket_number is null 36 | group by d_year, ss_item_sk, ss_customer_sk 37 | ) 38 | select 39 | ss_sold_year, ss_item_sk, ss_customer_sk, 40 | round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) ratio, 41 | ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, 42 | coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, 43 | coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, 44 | coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price 45 | from ss 46 | left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) 47 | left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) 48 | where coalesce(ws_qty,0)>0 and coalesce(cs_qty, 0)>0 and ss_sold_year=2000 49 | order by 50 | ss_sold_year, ss_item_sk, ss_customer_sk, 51 | ss_qty desc, ss_wc desc, ss_sp desc, 52 | other_chan_qty, 53 | other_chan_wholesale_cost, 54 | other_chan_sales_price, 55 | round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) 56 | limit 100 57 | 58 | -- end query 1 in stream 0 using template query78.tpl 59 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q79.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query79.tpl and seed 2031708268 2 | select 3 | c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit 4 | from 5 | (select ss_ticket_number 6 | ,ss_customer_sk 7 | ,store.s_city 8 | ,sum(ss_coupon_amt) amt 9 | ,sum(ss_net_profit) profit 10 | from store_sales,date_dim,store,household_demographics 11 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 12 | and store_sales.ss_store_sk = store.s_store_sk 13 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 14 | and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) 15 | and date_dim.d_dow = 1 16 | and date_dim.d_year in (1998,1998+1,1998+2) 17 | and store.s_number_employees between 200 and 295 18 | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer 19 | where ss_customer_sk = c_customer_sk 20 | order by c_last_name,c_first_name,substr(s_city,1,30), profit 21 | limit 100 22 | 23 | -- end query 1 in stream 0 using template query79.tpl 24 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q80.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query80.tpl and seed 1819994127 2 | with ssr as 3 | (select s_store_id as store_id, 4 | sum(ss_ext_sales_price) as sales, 5 | sum(coalesce(sr_return_amt, 0)) as `returns`, 6 | sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit 7 | from store_sales left outer join store_returns on 8 | (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), 9 | date_dim, 10 | store, 11 | item, 12 | promotion 13 | where ss_sold_date_sk = d_date_sk 14 | and d_date between cast('1998-08-04' as date) 15 | and (cast('1998-08-04' as date) + interval '30' day) 16 | and ss_store_sk = s_store_sk 17 | and ss_item_sk = i_item_sk 18 | and i_current_price > 50 19 | and ss_promo_sk = p_promo_sk 20 | and p_channel_tv = 'N' 21 | group by s_store_id) 22 | , 23 | csr as 24 | (select cp_catalog_page_id as catalog_page_id, 25 | sum(cs_ext_sales_price) as sales, 26 | sum(coalesce(cr_return_amount, 0)) as `returns`, 27 | sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit 28 | from catalog_sales left outer join catalog_returns on 29 | (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), 30 | date_dim, 31 | catalog_page, 32 | item, 33 | promotion 34 | where cs_sold_date_sk = d_date_sk 35 | and d_date between cast('1998-08-04' as date) 36 | and (cast('1998-08-04' as date) + interval '30' day) 37 | and cs_catalog_page_sk = cp_catalog_page_sk 38 | and cs_item_sk = i_item_sk 39 | and i_current_price > 50 40 | and cs_promo_sk = p_promo_sk 41 | and p_channel_tv = 'N' 42 | group by cp_catalog_page_id) 43 | , 44 | wsr as 45 | (select web_site_id, 46 | sum(ws_ext_sales_price) as sales, 47 | sum(coalesce(wr_return_amt, 0)) as `returns`, 48 | sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit 49 | from web_sales left outer join web_returns on 50 | (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), 51 | date_dim, 52 | web_site, 53 | item, 54 | promotion 55 | where ws_sold_date_sk = d_date_sk 56 | and d_date between cast('1998-08-04' as date) 57 | and (cast('1998-08-04' as date) + interval '30' day) 58 | and ws_web_site_sk = web_site_sk 59 | and ws_item_sk = i_item_sk 60 | and i_current_price > 50 61 | and ws_promo_sk = p_promo_sk 62 | and p_channel_tv = 'N' 63 | group by web_site_id) 64 | select channel 65 | , id 66 | , sum(sales) as sales 67 | , sum(`returns`) as `returns` 68 | , sum(profit) as profit 69 | from 70 | (select 'store channel' as channel 71 | , 'store' || store_id as id 72 | , sales 73 | , `returns` 74 | , profit 75 | from ssr 76 | union all 77 | select 'catalog channel' as channel 78 | , 'catalog_page' || catalog_page_id as id 79 | , sales 80 | , `returns` 81 | , profit 82 | from csr 83 | union all 84 | select 'web channel' as channel 85 | , 'web_site' || web_site_id as id 86 | , sales 87 | , `returns` 88 | , profit 89 | from wsr 90 | ) x 91 | group by rollup (channel, id) 92 | order by channel 93 | ,id 94 | limit 100 95 | 96 | -- end query 1 in stream 0 using template query80.tpl 97 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q81.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query81.tpl and seed 1819994127 2 | with customer_total_return as 3 | (select cr_returning_customer_sk as ctr_customer_sk 4 | ,ca_state as ctr_state, 5 | sum(cr_return_amt_inc_tax) as ctr_total_return 6 | from catalog_returns 7 | ,date_dim 8 | ,customer_address 9 | where cr_returned_date_sk = d_date_sk 10 | and d_year =1998 11 | and cr_returning_addr_sk = ca_address_sk 12 | group by cr_returning_customer_sk 13 | ,ca_state ) 14 | select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name 15 | ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset 16 | ,ca_location_type,ctr_total_return 17 | from customer_total_return ctr1 18 | ,customer_address 19 | ,customer 20 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 21 | from customer_total_return ctr2 22 | where ctr1.ctr_state = ctr2.ctr_state) 23 | and ca_address_sk = c_current_addr_sk 24 | and ca_state = 'IL' 25 | and ctr1.ctr_customer_sk = c_customer_sk 26 | order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name 27 | ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset 28 | ,ca_location_type,ctr_total_return 29 | limit 100 30 | 31 | -- end query 1 in stream 0 using template query81.tpl 32 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q82.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query82.tpl and seed 55585014 2 | select i_item_id 3 | ,i_item_desc 4 | ,i_current_price 5 | from item, inventory, date_dim, store_sales 6 | where i_current_price between 30 and 30+30 7 | and inv_item_sk = i_item_sk 8 | and d_date_sk=inv_date_sk 9 | and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + interval '60' day) 10 | and i_manufact_id in (437,129,727,663) 11 | and inv_quantity_on_hand between 100 and 500 12 | and ss_item_sk = i_item_sk 13 | group by i_item_id,i_item_desc,i_current_price 14 | order by i_item_id 15 | limit 100 16 | 17 | -- end query 1 in stream 0 using template query82.tpl 18 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q83.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query83.tpl and seed 1930872976 2 | with sr_items as 3 | (select i_item_id item_id, 4 | sum(sr_return_quantity) sr_item_qty 5 | from store_returns, 6 | item, 7 | date_dim 8 | where sr_item_sk = i_item_sk 9 | and d_date in 10 | (select d_date 11 | from date_dim 12 | where d_week_seq in 13 | (select d_week_seq 14 | from date_dim 15 | where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) 16 | and sr_returned_date_sk = d_date_sk 17 | group by i_item_id), 18 | cr_items as 19 | (select i_item_id item_id, 20 | sum(cr_return_quantity) cr_item_qty 21 | from catalog_returns, 22 | item, 23 | date_dim 24 | where cr_item_sk = i_item_sk 25 | and d_date in 26 | (select d_date 27 | from date_dim 28 | where d_week_seq in 29 | (select d_week_seq 30 | from date_dim 31 | where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) 32 | and cr_returned_date_sk = d_date_sk 33 | group by i_item_id), 34 | wr_items as 35 | (select i_item_id item_id, 36 | sum(wr_return_quantity) wr_item_qty 37 | from web_returns, 38 | item, 39 | date_dim 40 | where wr_item_sk = i_item_sk 41 | and d_date in 42 | (select d_date 43 | from date_dim 44 | where d_week_seq in 45 | (select d_week_seq 46 | from date_dim 47 | where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) 48 | and wr_returned_date_sk = d_date_sk 49 | group by i_item_id) 50 | select sr_items.item_id 51 | ,sr_item_qty 52 | ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev 53 | ,cr_item_qty 54 | ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev 55 | ,wr_item_qty 56 | ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev 57 | ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average 58 | from sr_items 59 | ,cr_items 60 | ,wr_items 61 | where sr_items.item_id=cr_items.item_id 62 | and sr_items.item_id=wr_items.item_id 63 | order by sr_items.item_id 64 | ,sr_item_qty 65 | limit 100 66 | 67 | -- end query 1 in stream 0 using template query83.tpl 68 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q84.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query84.tpl and seed 1819994127 2 | select c_customer_id as customer_id 3 | ,c_last_name || ', ' || c_first_name as customername 4 | from customer 5 | ,customer_address 6 | ,customer_demographics 7 | ,household_demographics 8 | ,income_band 9 | ,store_returns 10 | where ca_city = 'Hopewell' 11 | and c_current_addr_sk = ca_address_sk 12 | and ib_lower_bound >= 32287 13 | and ib_upper_bound <= 32287 + 50000 14 | and ib_income_band_sk = hd_income_band_sk 15 | and cd_demo_sk = c_current_cdemo_sk 16 | and hd_demo_sk = c_current_hdemo_sk 17 | and sr_cdemo_sk = cd_demo_sk 18 | order by c_customer_id 19 | limit 100 20 | 21 | -- end query 1 in stream 0 using template query84.tpl 22 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q85.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query85.tpl and seed 622697896 2 | select substr(r_reason_desc,1,20) 3 | ,avg(ws_quantity) 4 | ,avg(wr_refunded_cash) 5 | ,avg(wr_fee) 6 | from web_sales, web_returns, web_page, customer_demographics cd1, 7 | customer_demographics cd2, customer_address, date_dim, reason 8 | where ws_web_page_sk = wp_web_page_sk 9 | and ws_item_sk = wr_item_sk 10 | and ws_order_number = wr_order_number 11 | and ws_sold_date_sk = d_date_sk and d_year = 1998 12 | and cd1.cd_demo_sk = wr_refunded_cdemo_sk 13 | and cd2.cd_demo_sk = wr_returning_cdemo_sk 14 | and ca_address_sk = wr_refunded_addr_sk 15 | and r_reason_sk = wr_reason_sk 16 | and 17 | ( 18 | ( 19 | cd1.cd_marital_status = 'M' 20 | and 21 | cd1.cd_marital_status = cd2.cd_marital_status 22 | and 23 | cd1.cd_education_status = '4 yr Degree' 24 | and 25 | cd1.cd_education_status = cd2.cd_education_status 26 | and 27 | ws_sales_price between 100.00 and 150.00 28 | ) 29 | or 30 | ( 31 | cd1.cd_marital_status = 'D' 32 | and 33 | cd1.cd_marital_status = cd2.cd_marital_status 34 | and 35 | cd1.cd_education_status = 'Primary' 36 | and 37 | cd1.cd_education_status = cd2.cd_education_status 38 | and 39 | ws_sales_price between 50.00 and 100.00 40 | ) 41 | or 42 | ( 43 | cd1.cd_marital_status = 'U' 44 | and 45 | cd1.cd_marital_status = cd2.cd_marital_status 46 | and 47 | cd1.cd_education_status = 'Advanced Degree' 48 | and 49 | cd1.cd_education_status = cd2.cd_education_status 50 | and 51 | ws_sales_price between 150.00 and 200.00 52 | ) 53 | ) 54 | and 55 | ( 56 | ( 57 | ca_country = 'United States' 58 | and 59 | ca_state in ('KY', 'GA', 'NM') 60 | and ws_net_profit between 100 and 200 61 | ) 62 | or 63 | ( 64 | ca_country = 'United States' 65 | and 66 | ca_state in ('MT', 'OR', 'IN') 67 | and ws_net_profit between 150 and 300 68 | ) 69 | or 70 | ( 71 | ca_country = 'United States' 72 | and 73 | ca_state in ('WI', 'MO', 'WV') 74 | and ws_net_profit between 50 and 250 75 | ) 76 | ) 77 | group by r_reason_desc 78 | order by substr(r_reason_desc,1,20) 79 | ,avg(ws_quantity) 80 | ,avg(wr_refunded_cash) 81 | ,avg(wr_fee) 82 | limit 100 83 | 84 | -- end query 1 in stream 0 using template query85.tpl 85 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q86.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query86.tpl and seed 1819994127 2 | select 3 | sum(ws_net_paid) as total_sum 4 | ,i_category 5 | ,i_class 6 | ,grouping(i_category)+grouping(i_class) as lochierarchy 7 | ,rank() over ( 8 | partition by grouping(i_category)+grouping(i_class), 9 | case when grouping(i_class) = 0 then i_category end 10 | order by sum(ws_net_paid) desc) as rank_within_parent 11 | from 12 | web_sales 13 | ,date_dim d1 14 | ,item 15 | where 16 | d1.d_month_seq between 1212 and 1212+11 17 | and d1.d_date_sk = ws_sold_date_sk 18 | and i_item_sk = ws_item_sk 19 | group by rollup(i_category,i_class) 20 | order by 21 | lochierarchy desc, 22 | case when lochierarchy = 0 then i_category end, 23 | rank_within_parent 24 | limit 100 25 | 26 | -- end query 1 in stream 0 using template query86.tpl 27 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q87.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query87.tpl and seed 1819994127 2 | select count(*) 3 | from ((select distinct c_last_name, c_first_name, d_date 4 | from store_sales, date_dim, customer 5 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 6 | and store_sales.ss_customer_sk = customer.c_customer_sk 7 | and d_month_seq between 1212 and 1212+11) 8 | except 9 | (select distinct c_last_name, c_first_name, d_date 10 | from catalog_sales, date_dim, customer 11 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 12 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 13 | and d_month_seq between 1212 and 1212+11) 14 | except 15 | (select distinct c_last_name, c_first_name, d_date 16 | from web_sales, date_dim, customer 17 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 18 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 19 | and d_month_seq between 1212 and 1212+11) 20 | ) cool_cust 21 | 22 | 23 | -- end query 1 in stream 0 using template query87.tpl 24 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q89.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query89.tpl and seed 1719819282 2 | select * 3 | from( 4 | select i_category, i_class, i_brand, 5 | s_store_name, s_company_name, 6 | d_moy, 7 | sum(ss_sales_price) sum_sales, 8 | avg(sum(ss_sales_price)) over 9 | (partition by i_category, i_brand, s_store_name, s_company_name) 10 | avg_monthly_sales 11 | from item, store_sales, date_dim, store 12 | where ss_item_sk = i_item_sk and 13 | ss_sold_date_sk = d_date_sk and 14 | ss_store_sk = s_store_sk and 15 | d_year in (2000) and 16 | ((i_category in ('Home','Books','Electronics') and 17 | i_class in ('wallpaper','parenting','musical') 18 | ) 19 | or (i_category in ('Shoes','Jewelry','Men') and 20 | i_class in ('womens','birdal','pants') 21 | )) 22 | group by i_category, i_class, i_brand, 23 | s_store_name, s_company_name, d_moy) tmp1 24 | where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 25 | order by sum_sales - avg_monthly_sales, s_store_name 26 | limit 100 27 | 28 | -- end query 1 in stream 0 using template query89.tpl 29 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q9.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query9.tpl and seed 1490436826 2 | select case when (select count(*) 3 | from store_sales 4 | where ss_quantity between 1 and 20) > 409437 5 | then (select avg(ss_ext_list_price) 6 | from store_sales 7 | where ss_quantity between 1 and 20) 8 | else (select avg(ss_net_paid_inc_tax) 9 | from store_sales 10 | where ss_quantity between 1 and 20) end bucket1 , 11 | case when (select count(*) 12 | from store_sales 13 | where ss_quantity between 21 and 40) > 4595804 14 | then (select avg(ss_ext_list_price) 15 | from store_sales 16 | where ss_quantity between 21 and 40) 17 | else (select avg(ss_net_paid_inc_tax) 18 | from store_sales 19 | where ss_quantity between 21 and 40) end bucket2, 20 | case when (select count(*) 21 | from store_sales 22 | where ss_quantity between 41 and 60) > 7887297 23 | then (select avg(ss_ext_list_price) 24 | from store_sales 25 | where ss_quantity between 41 and 60) 26 | else (select avg(ss_net_paid_inc_tax) 27 | from store_sales 28 | where ss_quantity between 41 and 60) end bucket3, 29 | case when (select count(*) 30 | from store_sales 31 | where ss_quantity between 61 and 80) > 10872978 32 | then (select avg(ss_ext_list_price) 33 | from store_sales 34 | where ss_quantity between 61 and 80) 35 | else (select avg(ss_net_paid_inc_tax) 36 | from store_sales 37 | where ss_quantity between 61 and 80) end bucket4, 38 | case when (select count(*) 39 | from store_sales 40 | where ss_quantity between 81 and 100) > 43571537 41 | then (select avg(ss_ext_list_price) 42 | from store_sales 43 | where ss_quantity between 81 and 100) 44 | else (select avg(ss_net_paid_inc_tax) 45 | from store_sales 46 | where ss_quantity between 81 and 100) end bucket5 47 | from reason 48 | where r_reason_sk = 1 49 | 50 | 51 | -- end query 1 in stream 0 using template query9.tpl 52 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q90.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query90.tpl and seed 2031708268 2 | select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio 3 | from ( select count(*) amc 4 | from web_sales, household_demographics , time_dim, web_page 5 | where ws_sold_time_sk = time_dim.t_time_sk 6 | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk 7 | and ws_web_page_sk = web_page.wp_web_page_sk 8 | and time_dim.t_hour between 6 and 6+1 9 | and household_demographics.hd_dep_count = 8 10 | and web_page.wp_char_count between 5000 and 5200) `at`, 11 | ( select count(*) pmc 12 | from web_sales, household_demographics , time_dim, web_page 13 | where ws_sold_time_sk = time_dim.t_time_sk 14 | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk 15 | and ws_web_page_sk = web_page.wp_web_page_sk 16 | and time_dim.t_hour between 14 and 14+1 17 | and household_demographics.hd_dep_count = 8 18 | and web_page.wp_char_count between 5000 and 5200) pt 19 | order by am_pm_ratio 20 | limit 100 21 | 22 | -- end query 1 in stream 0 using template query90.tpl 23 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q91.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query91.tpl and seed 1930872976 2 | select 3 | cc_call_center_id Call_Center, 4 | cc_name Call_Center_Name, 5 | cc_manager Manager, 6 | sum(cr_net_loss) Returns_Loss 7 | from 8 | call_center, 9 | catalog_returns, 10 | date_dim, 11 | customer, 12 | customer_address, 13 | customer_demographics, 14 | household_demographics 15 | where 16 | cr_call_center_sk = cc_call_center_sk 17 | and cr_returned_date_sk = d_date_sk 18 | and cr_returning_customer_sk= c_customer_sk 19 | and cd_demo_sk = c_current_cdemo_sk 20 | and hd_demo_sk = c_current_hdemo_sk 21 | and ca_address_sk = c_current_addr_sk 22 | and d_year = 1999 23 | and d_moy = 11 24 | and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') 25 | or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) 26 | and hd_buy_potential like '0-500%' 27 | and ca_gmt_offset = -7 28 | group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status 29 | order by sum(cr_net_loss) desc 30 | 31 | -- end query 1 in stream 0 using template query91.tpl 32 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q92.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query92.tpl and seed 2031708268 2 | select 3 | sum(ws_ext_discount_amt) as `Excess Discount Amount` 4 | from 5 | web_sales 6 | ,item 7 | ,date_dim 8 | where 9 | i_manufact_id = 269 10 | and i_item_sk = ws_item_sk 11 | and d_date between '1998-03-18' and 12 | (cast('1998-03-18' as date) + interval '90' day) 13 | and d_date_sk = ws_sold_date_sk 14 | and ws_ext_discount_amt 15 | > ( 16 | SELECT 17 | 1.3 * avg(ws_ext_discount_amt) 18 | FROM 19 | web_sales 20 | ,date_dim 21 | WHERE 22 | ws_item_sk = i_item_sk 23 | and d_date between '1998-03-18' and 24 | (cast('1998-03-18' as date) + interval '90' day) 25 | and d_date_sk = ws_sold_date_sk 26 | ) 27 | order by sum(ws_ext_discount_amt) 28 | limit 100 29 | 30 | -- end query 1 in stream 0 using template query92.tpl 31 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q93.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query93.tpl and seed 1200409435 2 | select ss_customer_sk 3 | ,sum(act_sales) sumsales 4 | from (select ss_item_sk 5 | ,ss_ticket_number 6 | ,ss_customer_sk 7 | ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price 8 | else (ss_quantity*ss_sales_price) end act_sales 9 | from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk 10 | and sr_ticket_number = ss_ticket_number) 11 | ,reason 12 | where sr_reason_sk = r_reason_sk 13 | and r_reason_desc = 'Did not like the warranty') t 14 | group by ss_customer_sk 15 | order by sumsales, ss_customer_sk 16 | limit 100 17 | 18 | -- end query 1 in stream 0 using template query93.tpl 19 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q94.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query94.tpl and seed 2031708268 2 | select 3 | count(distinct ws_order_number) as `order count` 4 | ,sum(ws_ext_ship_cost) as `total shipping cost` 5 | ,sum(ws_net_profit) as `total net profit` 6 | from 7 | web_sales ws1 8 | ,date_dim 9 | ,customer_address 10 | ,web_site 11 | where 12 | d_date between '1999-5-01' and 13 | (cast('1999-5-01' as date) + interval '60' day) 14 | and ws1.ws_ship_date_sk = d_date_sk 15 | and ws1.ws_ship_addr_sk = ca_address_sk 16 | and ca_state = 'TX' 17 | and ws1.ws_web_site_sk = web_site_sk 18 | and web_company_name = 'pri' 19 | and exists (select * 20 | from web_sales ws2 21 | where ws1.ws_order_number = ws2.ws_order_number 22 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 23 | and not exists(select * 24 | from web_returns wr1 25 | where ws1.ws_order_number = wr1.wr_order_number) 26 | order by count(distinct ws_order_number) 27 | limit 100 28 | 29 | -- end query 1 in stream 0 using template query94.tpl 30 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q95.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query95.tpl and seed 2031708268 2 | with ws_wh as 3 | (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 4 | from web_sales ws1,web_sales ws2 5 | where ws1.ws_order_number = ws2.ws_order_number 6 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 7 | select 8 | count(distinct ws_order_number) as `order count` 9 | ,sum(ws_ext_ship_cost) as `total shipping cost` 10 | ,sum(ws_net_profit) as `total net profit` 11 | from 12 | web_sales ws1 13 | ,date_dim 14 | ,customer_address 15 | ,web_site 16 | where 17 | d_date between '1999-5-01' and 18 | (cast('1999-5-01' as date) + interval '60' day) 19 | and ws1.ws_ship_date_sk = d_date_sk 20 | and ws1.ws_ship_addr_sk = ca_address_sk 21 | and ca_state = 'TX' 22 | and ws1.ws_web_site_sk = web_site_sk 23 | and web_company_name = 'pri' 24 | and ws1.ws_order_number in (select ws_order_number 25 | from ws_wh) 26 | and ws1.ws_order_number in (select wr_order_number 27 | from web_returns,ws_wh 28 | where wr_order_number = ws_wh.ws_order_number) 29 | order by count(distinct ws_order_number) 30 | limit 100 31 | 32 | -- end query 1 in stream 0 using template query95.tpl 33 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q96.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query96.tpl and seed 1819994127 2 | select count(*) 3 | from store_sales 4 | ,household_demographics 5 | ,time_dim, store 6 | where ss_sold_time_sk = time_dim.t_time_sk 7 | and ss_hdemo_sk = household_demographics.hd_demo_sk 8 | and ss_store_sk = s_store_sk 9 | and time_dim.t_hour = 8 10 | and time_dim.t_minute >= 30 11 | and household_demographics.hd_dep_count = 5 12 | and store.s_store_name = 'ese' 13 | order by count(*) 14 | limit 100 15 | 16 | -- end query 1 in stream 0 using template query96.tpl 17 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q97.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query97.tpl and seed 1819994127 2 | with ssci as ( 3 | select ss_customer_sk customer_sk 4 | ,ss_item_sk item_sk 5 | from store_sales,date_dim 6 | where ss_sold_date_sk = d_date_sk 7 | and d_month_seq between 1212 and 1212 + 11 8 | group by ss_customer_sk 9 | ,ss_item_sk), 10 | csci as( 11 | select cs_bill_customer_sk customer_sk 12 | ,cs_item_sk item_sk 13 | from catalog_sales,date_dim 14 | where cs_sold_date_sk = d_date_sk 15 | and d_month_seq between 1212 and 1212 + 11 16 | group by cs_bill_customer_sk 17 | ,cs_item_sk) 18 | select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only 19 | ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only 20 | ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog 21 | from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk 22 | and ssci.item_sk = csci.item_sk) 23 | limit 100 24 | 25 | -- end query 1 in stream 0 using template query97.tpl 26 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q98.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query98.tpl and seed 345591136 2 | select i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(ss_ext_sales_price) as itemrevenue 7 | ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from 10 | store_sales 11 | ,item 12 | ,date_dim 13 | where 14 | ss_item_sk = i_item_sk 15 | and i_category in ('Jewelry', 'Sports', 'Books') 16 | and ss_sold_date_sk = d_date_sk 17 | and d_date between cast('2001-01-12' as date) 18 | and (cast('2001-01-12' as date) + interval '30' day) 19 | group by 20 | i_item_id 21 | ,i_item_desc 22 | ,i_category 23 | ,i_class 24 | ,i_current_price 25 | order by 26 | i_category 27 | ,i_class 28 | ,i_item_id 29 | ,i_item_desc 30 | ,revenueratio 31 | 32 | -- end query 1 in stream 0 using template query98.tpl 33 | -------------------------------------------------------------------------------- /flink-tpcds/src/main/resources/queries/q99.sql: -------------------------------------------------------------------------------- 1 | -- start query 1 in stream 0 using template query99.tpl and seed 1819994127 2 | select 3 | substr(w_warehouse_name,1,20) 4 | ,sm_type 5 | ,cc_name 6 | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` 7 | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and 8 | (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` 9 | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and 10 | (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` 11 | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and 12 | (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` 13 | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` 14 | from 15 | catalog_sales 16 | ,warehouse 17 | ,ship_mode 18 | ,call_center 19 | ,date_dim 20 | where 21 | d_month_seq between 1212 and 1212 + 11 22 | and cs_ship_date_sk = d_date_sk 23 | and cs_warehouse_sk = w_warehouse_sk 24 | and cs_ship_mode_sk = sm_ship_mode_sk 25 | and cs_call_center_sk = cc_call_center_sk 26 | group by 27 | substr(w_warehouse_name,1,20) 28 | ,sm_type 29 | ,cc_name 30 | order by substr(w_warehouse_name,1,20) 31 | ,sm_type 32 | ,cc_name 33 | limit 100 34 | 35 | -- end query 1 in stream 0 using template query99.tpl 36 | -------------------------------------------------------------------------------- /flink-tpcds/src/test/java/com/ververica/flink/benchmark/QueryUtilTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package com.ververica.flink.benchmark; 19 | 20 | import org.junit.ClassRule; 21 | import org.junit.Test; 22 | import org.junit.rules.TemporaryFolder; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.nio.charset.StandardCharsets; 27 | import java.nio.file.Files; 28 | import java.util.LinkedHashMap; 29 | import java.util.Map; 30 | 31 | import static com.ververica.flink.benchmark.QueryUtil.getQueries; 32 | import static org.junit.Assert.assertEquals; 33 | import static org.junit.Assert.assertNotNull; 34 | import static org.junit.Assert.assertTrue; 35 | 36 | /** 37 | * Test for {@link QueryUtil}. 38 | */ 39 | public class QueryUtilTest { 40 | 41 | @ClassRule 42 | public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); 43 | 44 | @Test 45 | public void testInternalAll() { 46 | LinkedHashMap queries = getQueries(null, null); 47 | assertQueries(queries, 102); 48 | } 49 | 50 | @Test 51 | public void testInternalSelect() { 52 | LinkedHashMap queries = getQueries(null, "q1.sql,q14b.sql,q95.sql"); 53 | assertQueries(queries, 3); 54 | } 55 | 56 | @Test 57 | public void testAll() throws IOException { 58 | String dir = prepareOutFile(); 59 | LinkedHashMap queries = getQueries(dir, null); 60 | assertQueries(queries, 102); 61 | } 62 | 63 | @Test 64 | public void testSelect() throws IOException { 65 | String dir = prepareOutFile(); 66 | LinkedHashMap queries = getQueries(dir, "q1.sql,q14b.sql,q95.sql"); 67 | assertQueries(queries, 3); 68 | } 69 | 70 | private String prepareOutFile() throws IOException { 71 | LinkedHashMap queries = getQueries(null, null); 72 | File dir = TEMPORARY_FOLDER.newFolder(); 73 | for (Map.Entry e : queries.entrySet()) { 74 | Files.write( 75 | new File(dir, e.getKey()).toPath(), 76 | e.getValue().getBytes(StandardCharsets.UTF_8)); 77 | } 78 | return dir.getAbsolutePath(); 79 | } 80 | 81 | private void assertQueries(LinkedHashMap queries, int size) { 82 | assertEquals(size, queries.size()); 83 | queries.forEach((name, sql) -> { 84 | assertNotNull(name); 85 | assertTrue(name.length() > 0); 86 | 87 | assertNotNull(sql); 88 | assertTrue(sql.length() > 0); 89 | }); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /hive-tpcds-setup/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: target/lib/dsdgen.jar target/hive-tpcds-setup-0.1-SNAPSHOT.jar 3 | 4 | target/hive-tpcds-setup-0.1-SNAPSHOT.jar: $(shell find . -name *.java) 5 | mvn package 6 | 7 | target/tpcds_kit.zip: tpcds_kit.zip 8 | mkdir -p target/ 9 | cp tpcds_kit.zip target/tpcds_kit.zip 10 | 11 | tpcds_kit.zip: 12 | curl https://public-repo-1.hortonworks.com/hive-testbench/tpcds/README 13 | [[ -e tpcds_kit.zip ]] || curl --output tpcds_kit.zip https://public-repo-1.hortonworks.com/hive-testbench/tpcds/TPCDS_Tools.zip 14 | 15 | target/lib/dsdgen.jar: target/tools/dsdgen 16 | cd target/; mkdir -p lib/; ( jar cvf lib/dsdgen.jar tools/ || gjar cvf lib/dsdgen.jar tools/ ) 17 | 18 | target/tools/dsdgen: target/tpcds_kit.zip 19 | test -d target/tools/ || (cd target; unzip tpcds_kit.zip) 20 | test -d target/tools/ || (cd target; mv */tools tools) 21 | cd target/tools; cat ../../patches/all/*.patch | patch -p0 22 | cd target/tools; cat ../../patches/${MYOS}/*.patch | patch -p1 23 | cd target/tools; make clean; make dsdgen 24 | 25 | clean: 26 | mvn clean -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/analyze.sql: -------------------------------------------------------------------------------- 1 | analyze table call_center compute statistics for columns; 2 | analyze table catalog_page compute statistics for columns; 3 | analyze table catalog_returns compute statistics for columns; 4 | analyze table catalog_sales compute statistics for columns; 5 | analyze table customer compute statistics for columns; 6 | analyze table customer_address compute statistics for columns; 7 | analyze table customer_demographics compute statistics for columns; 8 | analyze table date_dim compute statistics for columns; 9 | analyze table household_demographics compute statistics for columns; 10 | analyze table income_band compute statistics for columns; 11 | analyze table inventory compute statistics for columns; 12 | analyze table item compute statistics for columns; 13 | analyze table promotion compute statistics for columns; 14 | analyze table reason compute statistics for columns; 15 | analyze table ship_mode compute statistics for columns; 16 | analyze table store compute statistics for columns; 17 | analyze table store_returns compute statistics for columns; 18 | analyze table store_sales compute statistics for columns; 19 | analyze table time_dim compute statistics for columns; 20 | analyze table warehouse compute statistics for columns; 21 | analyze table web_page compute statistics for columns; 22 | analyze table web_returns compute statistics for columns; 23 | analyze table web_sales compute statistics for columns; 24 | analyze table web_site compute statistics for columns; -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/call_center.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists call_center; 5 | 6 | create table call_center 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.call_center; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/catalog_page.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_page; 5 | 6 | create table catalog_page 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.catalog_page; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/catalog_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_returns; 5 | 6 | create table catalog_returns 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.catalog_returns; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/catalog_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists catalog_sales; 5 | 6 | create table catalog_sales 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.catalog_sales; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/customer.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer; 5 | 6 | create table customer 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.customer 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/customer_address.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer_address; 5 | 6 | create table customer_address 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.customer_address 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/customer_demographics.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists customer_demographics; 5 | 6 | create table customer_demographics 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.customer_demographics; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/date_dim.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists date_dim; 5 | 6 | create table date_dim 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.date_dim; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/household_demographics.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists household_demographics; 5 | 6 | create table household_demographics 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.household_demographics; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/income_band.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists income_band; 5 | 6 | create table income_band 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.income_band; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/inventory.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists inventory; 5 | 6 | create table inventory 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.inventory 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/item.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists item; 5 | 6 | create table item 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.item 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/promotion.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists promotion; 5 | 6 | create table promotion 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.promotion; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/reason.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists reason; 5 | 6 | create table reason 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.reason; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/ship_mode.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists ship_mode; 5 | 6 | create table ship_mode 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.ship_mode; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/store.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store; 5 | 6 | create table store 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.store 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/store_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store_returns; 5 | 6 | create table store_returns 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.store_returns 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/store_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists store_sales; 5 | 6 | create table store_sales 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.store_sales 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/time_dim.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists time_dim; 5 | 6 | create table time_dim 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.time_dim; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/warehouse.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists warehouse; 5 | 6 | create table warehouse 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.warehouse; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/web_page.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_page; 5 | 6 | create table web_page 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.web_page; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/web_returns.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_returns; 5 | 6 | create table web_returns 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.web_returns 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/web_sales.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_sales; 5 | 6 | create table web_sales 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.web_sales 9 | ; 10 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin/web_site.sql: -------------------------------------------------------------------------------- 1 | create database if not exists ${DB}; 2 | use ${DB}; 3 | 4 | drop table if exists web_site; 5 | 6 | create table web_site 7 | stored as ${FILE} 8 | as select * from ${SOURCE}.web_site; 9 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/call_center.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into call_center select * from ${SOURCE}.call_center; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/catalog_page.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into catalog_page select * from ${SOURCE}.catalog_page; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/catalog_returns.sql: -------------------------------------------------------------------------------- 1 | set hive.exec.max.dynamic.partitions=100000; 2 | set hive.exec.max.created.files=1000000; 3 | set hive.exec.max.dynamic.partitions.pernode=100000; 4 | set hive.optimize.sort.dynamic.partition=true; 5 | set mapreduce.job.reduce.slowstart.completedmaps=1; 6 | use ${DB}; 7 | from ${SOURCE}.catalog_returns cr 8 | insert overwrite table catalog_returns partition(cr_returned_date_sk) 9 | select 10 | cr.cr_returned_time_sk, 11 | cr.cr_item_sk, 12 | cr.cr_refunded_customer_sk, 13 | cr.cr_refunded_cdemo_sk, 14 | cr.cr_refunded_hdemo_sk, 15 | cr.cr_refunded_addr_sk, 16 | cr.cr_returning_customer_sk, 17 | cr.cr_returning_cdemo_sk, 18 | cr.cr_returning_hdemo_sk, 19 | cr.cr_returning_addr_sk, 20 | cr.cr_call_center_sk, 21 | cr.cr_catalog_page_sk, 22 | cr.cr_ship_mode_sk, 23 | cr.cr_warehouse_sk, 24 | cr.cr_reason_sk, 25 | cr.cr_order_number, 26 | cr.cr_return_quantity, 27 | cr.cr_return_amount, 28 | cr.cr_return_tax, 29 | cr.cr_return_amt_inc_tax, 30 | cr.cr_fee, 31 | cr.cr_return_ship_cost, 32 | cr.cr_refunded_cash, 33 | cr.cr_reversed_charge, 34 | cr.cr_store_credit, 35 | cr.cr_net_loss, 36 | cr.cr_returned_date_sk 37 | where cr.cr_returned_date_sk is not null 38 | insert overwrite table catalog_returns partition (cr_returned_date_sk) 39 | select 40 | cr.cr_returned_time_sk, 41 | cr.cr_item_sk, 42 | cr.cr_refunded_customer_sk, 43 | cr.cr_refunded_cdemo_sk, 44 | cr.cr_refunded_hdemo_sk, 45 | cr.cr_refunded_addr_sk, 46 | cr.cr_returning_customer_sk, 47 | cr.cr_returning_cdemo_sk, 48 | cr.cr_returning_hdemo_sk, 49 | cr.cr_returning_addr_sk, 50 | cr.cr_call_center_sk, 51 | cr.cr_catalog_page_sk, 52 | cr.cr_ship_mode_sk, 53 | cr.cr_warehouse_sk, 54 | cr.cr_reason_sk, 55 | cr.cr_order_number, 56 | cr.cr_return_quantity, 57 | cr.cr_return_amount, 58 | cr.cr_return_tax, 59 | cr.cr_return_amt_inc_tax, 60 | cr.cr_fee, 61 | cr.cr_return_ship_cost, 62 | cr.cr_refunded_cash, 63 | cr.cr_reversed_charge, 64 | cr.cr_store_credit, 65 | cr.cr_net_loss, 66 | cr.cr_returned_date_sk 67 | where cr.cr_returned_date_sk is null 68 | sort by cr_returned_date_sk 69 | ; -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/catalog_sales.sql: -------------------------------------------------------------------------------- 1 | set hive.exec.max.dynamic.partitions=100000; 2 | set hive.exec.max.created.files=1000000; 3 | set hive.exec.max.dynamic.partitions.pernode=100000; 4 | set hive.optimize.sort.dynamic.partition=true; 5 | set mapreduce.job.reduce.slowstart.completedmaps=1; 6 | use ${DB}; 7 | from ${SOURCE}.catalog_sales cs 8 | insert overwrite table catalog_sales partition (cs_sold_date_sk) 9 | select 10 | cs.cs_sold_time_sk, 11 | cs.cs_ship_date_sk, 12 | cs.cs_bill_customer_sk, 13 | cs.cs_bill_cdemo_sk, 14 | cs.cs_bill_hdemo_sk, 15 | cs.cs_bill_addr_sk, 16 | cs.cs_ship_customer_sk, 17 | cs.cs_ship_cdemo_sk, 18 | cs.cs_ship_hdemo_sk, 19 | cs.cs_ship_addr_sk, 20 | cs.cs_call_center_sk, 21 | cs.cs_catalog_page_sk, 22 | cs.cs_ship_mode_sk, 23 | cs.cs_warehouse_sk, 24 | cs.cs_item_sk, 25 | cs.cs_promo_sk, 26 | cs.cs_order_number, 27 | cs.cs_quantity, 28 | cs.cs_wholesale_cost, 29 | cs.cs_list_price, 30 | cs.cs_sales_price, 31 | cs.cs_ext_discount_amt, 32 | cs.cs_ext_sales_price, 33 | cs.cs_ext_wholesale_cost, 34 | cs.cs_ext_list_price, 35 | cs.cs_ext_tax, 36 | cs.cs_coupon_amt, 37 | cs.cs_ext_ship_cost, 38 | cs.cs_net_paid, 39 | cs.cs_net_paid_inc_tax, 40 | cs.cs_net_paid_inc_ship, 41 | cs.cs_net_paid_inc_ship_tax, 42 | cs.cs_net_profit, 43 | cs.cs_sold_date_sk 44 | where cs.cs_sold_date_sk is not null 45 | insert overwrite table catalog_sales partition (cs_sold_date_sk) 46 | select 47 | cs.cs_sold_time_sk, 48 | cs.cs_ship_date_sk, 49 | cs.cs_bill_customer_sk, 50 | cs.cs_bill_cdemo_sk, 51 | cs.cs_bill_hdemo_sk, 52 | cs.cs_bill_addr_sk, 53 | cs.cs_ship_customer_sk, 54 | cs.cs_ship_cdemo_sk, 55 | cs.cs_ship_hdemo_sk, 56 | cs.cs_ship_addr_sk, 57 | cs.cs_call_center_sk, 58 | cs.cs_catalog_page_sk, 59 | cs.cs_ship_mode_sk, 60 | cs.cs_warehouse_sk, 61 | cs.cs_item_sk, 62 | cs.cs_promo_sk, 63 | cs.cs_order_number, 64 | cs.cs_quantity, 65 | cs.cs_wholesale_cost, 66 | cs.cs_list_price, 67 | cs.cs_sales_price, 68 | cs.cs_ext_discount_amt, 69 | cs.cs_ext_sales_price, 70 | cs.cs_ext_wholesale_cost, 71 | cs.cs_ext_list_price, 72 | cs.cs_ext_tax, 73 | cs.cs_coupon_amt, 74 | cs.cs_ext_ship_cost, 75 | cs.cs_net_paid, 76 | cs.cs_net_paid_inc_tax, 77 | cs.cs_net_paid_inc_ship, 78 | cs.cs_net_paid_inc_ship_tax, 79 | cs.cs_net_profit, 80 | cs.cs_sold_date_sk 81 | where cs.cs_sold_date_sk is null 82 | sort by cs.cs_sold_date_sk 83 | ; -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/customer.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into customer select * from ${SOURCE}.customer; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/customer_address.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into customer_address select * from ${SOURCE}.customer_address; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/customer_demographics.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into customer_demographics select * from ${SOURCE}.customer_demographics; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/date_dim.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into date_dim select * from ${SOURCE}.date_dim; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/household_demographics.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into household_demographics select * from ${SOURCE}.household_demographics; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/income_band.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into income_band select * from ${SOURCE}.income_band; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/inventory.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into inventory select * from ${SOURCE}.inventory 3 | ; 4 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/item.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into item select * from ${SOURCE}.item 3 | ; 4 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/promotion.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into promotion select * from ${SOURCE}.promotion; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/reason.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into reason select * from ${SOURCE}.reason; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/ship_mode.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into ship_mode select * from ${SOURCE}.ship_mode; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/store.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into store select * from ${SOURCE}.store 3 | ; 4 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/store_returns.sql: -------------------------------------------------------------------------------- 1 | set hive.exec.max.dynamic.partitions=100000; 2 | set hive.exec.max.created.files=1000000; 3 | set hive.exec.max.dynamic.partitions.pernode=100000; 4 | set hive.optimize.sort.dynamic.partition=true; 5 | set mapreduce.job.reduce.slowstart.completedmaps=1; 6 | use ${DB}; 7 | from ${SOURCE}.store_returns sr 8 | insert overwrite table store_returns partition (sr_returned_date_sk) 9 | select 10 | sr.sr_return_time_sk, 11 | sr.sr_item_sk, 12 | sr.sr_customer_sk, 13 | sr.sr_cdemo_sk, 14 | sr.sr_hdemo_sk, 15 | sr.sr_addr_sk, 16 | sr.sr_store_sk, 17 | sr.sr_reason_sk, 18 | sr.sr_ticket_number, 19 | sr.sr_return_quantity, 20 | sr.sr_return_amt, 21 | sr.sr_return_tax, 22 | sr.sr_return_amt_inc_tax, 23 | sr.sr_fee, 24 | sr.sr_return_ship_cost, 25 | sr.sr_refunded_cash, 26 | sr.sr_reversed_charge, 27 | sr.sr_store_credit, 28 | sr.sr_net_loss, 29 | sr.sr_returned_date_sk 30 | where sr.sr_returned_date_sk is not null 31 | insert overwrite table store_returns partition (sr_returned_date_sk) 32 | select 33 | sr.sr_return_time_sk, 34 | sr.sr_item_sk, 35 | sr.sr_customer_sk, 36 | sr.sr_cdemo_sk, 37 | sr.sr_hdemo_sk, 38 | sr.sr_addr_sk, 39 | sr.sr_store_sk, 40 | sr.sr_reason_sk, 41 | sr.sr_ticket_number, 42 | sr.sr_return_quantity, 43 | sr.sr_return_amt, 44 | sr.sr_return_tax, 45 | sr.sr_return_amt_inc_tax, 46 | sr.sr_fee, 47 | sr.sr_return_ship_cost, 48 | sr.sr_refunded_cash, 49 | sr.sr_reversed_charge, 50 | sr.sr_store_credit, 51 | sr.sr_net_loss, 52 | sr.sr_returned_date_sk 53 | where sr.sr_returned_date_sk is null 54 | sort by sr.sr_returned_date_sk; -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/store_sales.sql: -------------------------------------------------------------------------------- 1 | set hive.exec.max.dynamic.partitions=100000; 2 | set hive.exec.max.created.files=1000000; 3 | set hive.exec.max.dynamic.partitions.pernode=100000; 4 | set hive.optimize.sort.dynamic.partition=true; 5 | set mapreduce.job.reduce.slowstart.completedmaps=1; 6 | use ${DB}; 7 | from ${SOURCE}.store_sales ss 8 | insert overwrite table store_sales partition (ss_sold_date_sk) 9 | select 10 | ss.ss_sold_time_sk, 11 | ss.ss_item_sk, 12 | ss.ss_customer_sk, 13 | ss.ss_cdemo_sk, 14 | ss.ss_hdemo_sk, 15 | ss.ss_addr_sk, 16 | ss.ss_store_sk, 17 | ss.ss_promo_sk, 18 | ss.ss_ticket_number, 19 | ss.ss_quantity, 20 | ss.ss_wholesale_cost, 21 | ss.ss_list_price, 22 | ss.ss_sales_price, 23 | ss.ss_ext_discount_amt, 24 | ss.ss_ext_sales_price, 25 | ss.ss_ext_wholesale_cost, 26 | ss.ss_ext_list_price, 27 | ss.ss_ext_tax, 28 | ss.ss_coupon_amt, 29 | ss.ss_net_paid, 30 | ss.ss_net_paid_inc_tax, 31 | ss.ss_net_profit, 32 | ss.ss_sold_date_sk 33 | where ss.ss_sold_date_sk is not null 34 | insert overwrite table store_sales partition (ss_sold_date_sk) 35 | select 36 | ss.ss_sold_time_sk, 37 | ss.ss_item_sk, 38 | ss.ss_customer_sk, 39 | ss.ss_cdemo_sk, 40 | ss.ss_hdemo_sk, 41 | ss.ss_addr_sk, 42 | ss.ss_store_sk, 43 | ss.ss_promo_sk, 44 | ss.ss_ticket_number, 45 | ss.ss_quantity, 46 | ss.ss_wholesale_cost, 47 | ss.ss_list_price, 48 | ss.ss_sales_price, 49 | ss.ss_ext_discount_amt, 50 | ss.ss_ext_sales_price, 51 | ss.ss_ext_wholesale_cost, 52 | ss.ss_ext_list_price, 53 | ss.ss_ext_tax, 54 | ss.ss_coupon_amt, 55 | ss.ss_net_paid, 56 | ss.ss_net_paid_inc_tax, 57 | ss.ss_net_profit, 58 | ss.ss_sold_date_sk 59 | where ss.ss_sold_date_sk is null 60 | sort by ss.ss_sold_date_sk 61 | ; -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/time_dim.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into time_dim select * from ${SOURCE}.time_dim; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/warehouse.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into warehouse select * from ${SOURCE}.warehouse; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/web_page.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into web_page select * from ${SOURCE}.web_page; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/web_returns.sql: -------------------------------------------------------------------------------- 1 | set hive.exec.max.dynamic.partitions=100000; 2 | set hive.exec.max.created.files=1000000; 3 | set hive.exec.max.dynamic.partitions.pernode=100000; 4 | set hive.optimize.sort.dynamic.partition=true; 5 | set mapreduce.job.reduce.slowstart.completedmaps=1; 6 | use ${DB}; 7 | from ${SOURCE}.web_returns wr 8 | insert overwrite table web_returns partition (wr_returned_date_sk) 9 | select 10 | wr.wr_returned_time_sk, 11 | wr.wr_item_sk, 12 | wr.wr_refunded_customer_sk, 13 | wr.wr_refunded_cdemo_sk, 14 | wr.wr_refunded_hdemo_sk, 15 | wr.wr_refunded_addr_sk, 16 | wr.wr_returning_customer_sk, 17 | wr.wr_returning_cdemo_sk, 18 | wr.wr_returning_hdemo_sk, 19 | wr.wr_returning_addr_sk, 20 | wr.wr_web_page_sk, 21 | wr.wr_reason_sk, 22 | wr.wr_order_number, 23 | wr.wr_return_quantity, 24 | wr.wr_return_amt, 25 | wr.wr_return_tax, 26 | wr.wr_return_amt_inc_tax, 27 | wr.wr_fee, 28 | wr.wr_return_ship_cost, 29 | wr.wr_refunded_cash, 30 | wr.wr_reversed_charge, 31 | wr.wr_account_credit, 32 | wr.wr_net_loss, 33 | wr.wr_returned_date_sk 34 | where wr.wr_returned_date_sk is not null 35 | insert overwrite table web_returns partition (wr_returned_date_sk) 36 | select 37 | wr.wr_returned_time_sk, 38 | wr.wr_item_sk, 39 | wr.wr_refunded_customer_sk, 40 | wr.wr_refunded_cdemo_sk, 41 | wr.wr_refunded_hdemo_sk, 42 | wr.wr_refunded_addr_sk, 43 | wr.wr_returning_customer_sk, 44 | wr.wr_returning_cdemo_sk, 45 | wr.wr_returning_hdemo_sk, 46 | wr.wr_returning_addr_sk, 47 | wr.wr_web_page_sk, 48 | wr.wr_reason_sk, 49 | wr.wr_order_number, 50 | wr.wr_return_quantity, 51 | wr.wr_return_amt, 52 | wr.wr_return_tax, 53 | wr.wr_return_amt_inc_tax, 54 | wr.wr_fee, 55 | wr.wr_return_ship_cost, 56 | wr.wr_refunded_cash, 57 | wr.wr_reversed_charge, 58 | wr.wr_account_credit, 59 | wr.wr_net_loss, 60 | wr.wr_returned_date_sk 61 | where wr.wr_returned_date_sk is null 62 | sort by wr.wr_returned_date_sk 63 | ; -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/web_sales.sql: -------------------------------------------------------------------------------- 1 | set hive.exec.max.dynamic.partitions=100000; 2 | set hive.exec.max.created.files=1000000; 3 | set hive.exec.max.dynamic.partitions.pernode=100000; 4 | set hive.optimize.sort.dynamic.partition=true; 5 | set mapreduce.job.reduce.slowstart.completedmaps=1; 6 | use ${DB}; 7 | from ${SOURCE}.web_sales ws 8 | insert overwrite table web_sales partition (ws_sold_date_sk) 9 | select 10 | ws.ws_sold_time_sk, 11 | ws.ws_ship_date_sk, 12 | ws.ws_item_sk, 13 | ws.ws_bill_customer_sk, 14 | ws.ws_bill_cdemo_sk, 15 | ws.ws_bill_hdemo_sk, 16 | ws.ws_bill_addr_sk, 17 | ws.ws_ship_customer_sk, 18 | ws.ws_ship_cdemo_sk, 19 | ws.ws_ship_hdemo_sk, 20 | ws.ws_ship_addr_sk, 21 | ws.ws_web_page_sk, 22 | ws.ws_web_site_sk, 23 | ws.ws_ship_mode_sk, 24 | ws.ws_warehouse_sk, 25 | ws.ws_promo_sk, 26 | ws.ws_order_number, 27 | ws.ws_quantity, 28 | ws.ws_wholesale_cost, 29 | ws.ws_list_price, 30 | ws.ws_sales_price, 31 | ws.ws_ext_discount_amt, 32 | ws.ws_ext_sales_price, 33 | ws.ws_ext_wholesale_cost, 34 | ws.ws_ext_list_price, 35 | ws.ws_ext_tax, 36 | ws.ws_coupon_amt, 37 | ws.ws_ext_ship_cost, 38 | ws.ws_net_paid, 39 | ws.ws_net_paid_inc_tax, 40 | ws.ws_net_paid_inc_ship, 41 | ws.ws_net_paid_inc_ship_tax, 42 | ws.ws_net_profit, 43 | ws.ws_sold_date_sk 44 | where ws.ws_sold_date_sk is not null 45 | insert overwrite table web_sales partition (ws_sold_date_sk) 46 | select 47 | ws.ws_sold_time_sk, 48 | ws.ws_ship_date_sk, 49 | ws.ws_item_sk, 50 | ws.ws_bill_customer_sk, 51 | ws.ws_bill_cdemo_sk, 52 | ws.ws_bill_hdemo_sk, 53 | ws.ws_bill_addr_sk, 54 | ws.ws_ship_customer_sk, 55 | ws.ws_ship_cdemo_sk, 56 | ws.ws_ship_hdemo_sk, 57 | ws.ws_ship_addr_sk, 58 | ws.ws_web_page_sk, 59 | ws.ws_web_site_sk, 60 | ws.ws_ship_mode_sk, 61 | ws.ws_warehouse_sk, 62 | ws.ws_promo_sk, 63 | ws.ws_order_number, 64 | ws.ws_quantity, 65 | ws.ws_wholesale_cost, 66 | ws.ws_list_price, 67 | ws.ws_sales_price, 68 | ws.ws_ext_discount_amt, 69 | ws.ws_ext_sales_price, 70 | ws.ws_ext_wholesale_cost, 71 | ws.ws_ext_list_price, 72 | ws.ws_ext_tax, 73 | ws.ws_coupon_amt, 74 | ws.ws_ext_ship_cost, 75 | ws.ws_net_paid, 76 | ws.ws_net_paid_inc_tax, 77 | ws.ws_net_paid_inc_ship, 78 | ws.ws_net_paid_inc_ship_tax, 79 | ws.ws_net_profit, 80 | ws.ws_sold_date_sk 81 | where ws.ws_sold_date_sk is null 82 | sort by ws.ws_sold_date_sk 83 | ; -------------------------------------------------------------------------------- /hive-tpcds-setup/ddl-tpcds/bin_partitioned/web_site.sql: -------------------------------------------------------------------------------- 1 | use ${DB}; 2 | insert into web_site select * from ${SOURCE}.web_site; 3 | -------------------------------------------------------------------------------- /hive-tpcds-setup/patches/Darwin/macosx.2.patch: -------------------------------------------------------------------------------- 1 | diff -rupN tools/date.c toolsnew/date.c 2 | --- tools/date.c 2013-04-18 04:50:28.000000000 +0800 3 | +++ toolsnew/date.c 2020-01-13 17:24:59.000000000 +0800 4 | @@ -37,7 +37,7 @@ 5 | /*** includes ***/ 6 | #include 7 | #ifndef USE_STDLIB_H 8 | -#include 9 | +#include 10 | #endif 11 | #include 12 | #include 13 | diff -rupN tools/dist.c toolsnew/dist.c 14 | --- tools/dist.c 2013-04-18 04:50:24.000000000 +0800 15 | +++ toolsnew/dist.c 2020-01-13 17:25:30.000000000 +0800 16 | @@ -38,7 +38,7 @@ 17 | #include 18 | #include 19 | #include 20 | -#include 21 | +#include 22 | #ifdef WIN32 23 | #include 24 | #include 25 | @@ -52,7 +52,7 @@ 26 | #include 27 | #endif 28 | #ifndef USE_STDLIB_H 29 | -#include 30 | +#include 31 | #endif 32 | #include "config.h" 33 | #include "decimal.h" 34 | diff -rupN tools/genrand.c toolsnew/genrand.c 35 | --- tools/genrand.c 2013-04-18 04:50:24.000000000 +0800 36 | +++ toolsnew/genrand.c 2020-01-13 17:26:22.000000000 +0800 37 | @@ -54,6 +54,8 @@ 38 | #include "tables.h" 39 | #include "streams.h" 40 | 41 | +#define MAXINT INT_MAX 42 | + 43 | static long Mult = 16807; /* the multiplier */ 44 | static long nQ = 127773; /* the quotient MAXINT / Mult */ 45 | static long nR = 2836; /* the remainder MAXINT % Mult */ 46 | diff -rupN tools/misc.c toolsnew/misc.c 47 | --- tools/misc.c 2013-04-18 04:50:26.000000000 +0800 48 | +++ toolsnew/misc.c 2020-01-13 17:26:58.000000000 +0800 49 | @@ -42,7 +42,7 @@ 50 | #include 51 | #include 52 | #ifndef _POSIX_SOURCE 53 | -#include 54 | +#include 55 | #endif /* POSIX_SOURCE */ 56 | #include 57 | #ifdef AIX 58 | diff -rupN tools/nulls.c toolsnew/nulls.c 59 | --- tools/nulls.c 2013-04-18 04:50:26.000000000 +0800 60 | +++ toolsnew/nulls.c 2020-01-13 17:27:23.000000000 +0800 61 | @@ -39,6 +39,8 @@ 62 | #include "genrand.h" 63 | #include "tdefs.h" 64 | 65 | +#define MAXINT INT_MAX 66 | + 67 | /* 68 | * Routine: nullCheck(int nColumn) 69 | * Purpose: 70 | diff -rupN tools/porting.h toolsnew/porting.h 71 | --- tools/porting.h 2013-04-18 04:50:28.000000000 +0800 72 | +++ toolsnew/porting.h 2020-01-13 17:24:02.000000000 +0800 73 | @@ -43,7 +43,7 @@ 74 | #endif 75 | 76 | #ifdef USE_VALUES_H 77 | -#include 78 | +#include 79 | #endif 80 | 81 | #ifdef USE_LIMITS_H 82 | -------------------------------------------------------------------------------- /hive-tpcds-setup/patches/all/tpcds-buffered.patch: -------------------------------------------------------------------------------- 1 | diff --git print.c print.c 2 | index 1b64362..5108bd7 100644 3 | --- print.c 4 | +++ print.c 5 | @@ -68,6 +68,7 @@ print_close(int tbl) 6 | fpOutfile = NULL; 7 | if (pTdef->outfile) 8 | { 9 | + fflush(pTdef->outfile); 10 | fclose(pTdef->outfile); 11 | pTdef->outfile = NULL; 12 | } 13 | @@ -536,7 +538,7 @@ print_end (int tbl) 14 | if (add_term) 15 | fwrite(term, 1, add_term, fpOutfile); 16 | fprintf (fpOutfile, "\n"); 17 | - fflush(fpOutfile); 18 | + //fflush(fpOutfile); 19 | 20 | return (res); 21 | } 22 | -------------------------------------------------------------------------------- /hive-tpcds-setup/patches/all/tpcds-strcpy.patch: -------------------------------------------------------------------------------- 1 | diff --git r_params.c r_params.c 2 | index 4db16e5..9b1a8e6 100644 3 | --- r_params.c 4 | +++ r_params.c 5 | @@ -46,7 +46,7 @@ 6 | #include "tdefs.h" 7 | #include "release.h" 8 | 9 | -#define PARAM_MAX_LEN 80 10 | +#define PARAM_MAX_LEN PATH_MAX 11 | 12 | #ifndef TEST 13 | extern option_t options[]; 14 | @@ -275,7 +275,7 @@ set_str(char *var, char *val) 15 | nParam = fnd_param(var); 16 | if (nParam >= 0) 17 | { 18 | - strcpy(params[options[nParam].index], val); 19 | + strncpy(params[options[nParam].index], val, PARAM_MAX_LEN); 20 | options[nParam].flags |= OPT_SET; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /hive-tpcds-setup/patches/all/tpcds_misspelled_header_guard.patch: -------------------------------------------------------------------------------- 1 | --- w_store_sales.h.orig 2014-06-25 10:58:19.000000000 -0700 2 | +++ w_store_sales.h 2014-06-25 10:58:51.000000000 -0700 3 | @@ -34,7 +34,7 @@ 4 | * Gradient Systems 5 | */ 6 | #ifndef W_STORE_SALES_H 7 | -#define W_STORE_SLAES_H 8 | +#define W_STORE_SALES_H 9 | 10 | #include "constants.h" 11 | #include "pricing.h" 12 | -------------------------------------------------------------------------------- /hive-tpcds-setup/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 23 | 24 | 4.0.0 25 | 26 | 27 | flink-sql-benchmark 28 | com.ververica 29 | 0.1-SNAPSHOT 30 | .. 31 | 32 | 33 | com.ververica 34 | hive-tpcds-setup 35 | 0.1-SNAPSHOT 36 | jar 37 | 38 | 39 | 2.7.5 40 | 1.3.1 41 | 42 | 43 | 44 | 45 | org.apache.hadoop 46 | hadoop-client 47 | ${hadoop.version} 48 | 49 | 50 | 51 | commons-cli 52 | commons-cli 53 | ${commons.version} 54 | 55 | 56 | 57 | 58 | 59 | 60 | maven-compiler-plugin 61 | 62 | 1.8 63 | 1.8 64 | 65 | 66 | 67 | 68 | org.apache.maven.plugins 69 | maven-jar-plugin 70 | 71 | 72 | 73 | 74 | 75 | com.ververica.benchmark.GenTable 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /hive-tpcds-setup/tpcds-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Check for all the stuff we need to function. 4 | for f in gcc javac mvn; do 5 | which $f > /dev/null 2>&1 6 | if [ $? -ne 0 ]; then 7 | echo "Required program $f is missing. Please install or fix your path and try again." 8 | exit 1 9 | fi 10 | done 11 | 12 | echo "Building TPC-DS Data Generator" 13 | 14 | unamestr=`uname` 15 | if [[ "$unamestr" == 'Darwin' ]]; then 16 | export MYOS="Darwin" 17 | fi 18 | 19 | make 20 | echo "TPC-DS Data Generator built, you can now use tpcds-setup.sh to generate data." -------------------------------------------------------------------------------- /hive-tpcds-setup/tpcds-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function usage { 4 | echo "Usage: tpcds-setup.sh scale_factor [temp_directory]" 5 | exit 1 6 | } 7 | 8 | function runcommand { 9 | if [ "X$DEBUG_SCRIPT" != "X" ]; then 10 | $1 11 | else 12 | $1 2>/dev/null 13 | fi 14 | } 15 | 16 | if [ ! -f target/hive-tpcds-setup-0.1-SNAPSHOT.jar ]; then 17 | echo "Please build the data generator with ./tpcds-build.sh first" 18 | exit 1 19 | fi 20 | 21 | if [ "X$HIVE_BIN" = "X" ]; then 22 | HIVE_BIN=`which hive` 23 | if [ $? -ne 0 ]; then 24 | echo "Script must be run where hive in PATH or HIVE_BIN env variable is set" 25 | exit 1 26 | fi 27 | fi 28 | 29 | # Tables in the TPC-DS schema. 30 | DIMS="date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page web_site" 31 | FACTS="store_sales store_returns web_sales web_returns catalog_sales catalog_returns inventory" 32 | 33 | # Get the parameters. 34 | SCALE=$1 35 | DIR=$2 36 | if [ "X$BUCKET_DATA" != "X" ]; then 37 | BUCKETS=13 38 | RETURN_BUCKETS=13 39 | else 40 | BUCKETS=1 41 | RETURN_BUCKETS=1 42 | fi 43 | if [ "X$DEBUG_SCRIPT" != "X" ]; then 44 | set -x 45 | fi 46 | 47 | # Sanity checking. 48 | if [ X"$SCALE" = "X" ]; then 49 | usage 50 | fi 51 | if [ X"$DIR" = "X" ]; then 52 | DIR=/tmp/tpcds-generate 53 | fi 54 | if [ $SCALE -eq 1 ]; then 55 | echo "Scale factor must be greater than 1" 56 | exit 1 57 | fi 58 | 59 | # Do the actual data generation. 60 | hdfs dfs -mkdir -p ${DIR} 61 | hdfs dfs -ls ${DIR}/${SCALE} > /dev/null 62 | if [ $? -ne 0 ]; then 63 | echo "Generating data at scale factor $SCALE." 64 | hadoop jar target/*.jar -d ${DIR}/${SCALE}/ -s ${SCALE} 65 | fi 66 | hdfs dfs -ls ${DIR}/${SCALE} > /dev/null 67 | if [ $? -ne 0 ]; then 68 | echo "Data generation failed, exiting." 69 | exit 1 70 | fi 71 | 72 | hadoop fs -chmod -R 777 ${DIR}/${SCALE} 73 | 74 | echo "TPC-DS text data generation complete." 75 | 76 | # Create the text/flat tables as external tables. 77 | echo "Loading text data into external tables." 78 | TXT_DATABASE=tpcds_text_${SCALE} 79 | runcommand "$HIVE_BIN -f ddl-tpcds/text/alltables.sql --hivevar DB=${TXT_DATABASE} --hivevar LOCATION=${DIR}/${SCALE}" 80 | 81 | # Create tables for the specified table format. 82 | if [ "X$FORMAT" = "X" ]; then 83 | FORMAT=orc 84 | fi 85 | 86 | LOAD_FILE="load_${FORMAT}_${SCALE}.mk" 87 | SILENCE="2> /dev/null 1> /dev/null" 88 | if [ "X$DEBUG_SCRIPT" != "X" ]; then 89 | SILENCE="" 90 | fi 91 | 92 | echo -e "all: ${DIMS} ${FACTS}" > $LOAD_FILE 93 | 94 | i=1 95 | total=24 96 | DATABASE=tpcds_bin_${FORMAT}_${SCALE} 97 | MAX_REDUCERS=2500 # maximum number of useful reducers for any scale 98 | REDUCERS=$((test ${SCALE} -gt ${MAX_REDUCERS} && echo ${MAX_REDUCERS}) || echo ${SCALE}) 99 | 100 | for t in ${DIMS} 101 | do 102 | COMMAND="$HIVE_BIN -f ddl-tpcds/bin/${t}.sql \ 103 | --hivevar DB=${DATABASE} --hivevar SOURCE=${TXT_DATABASE} \ 104 | --hivevar SCALE=${SCALE} \ 105 | --hivevar REDUCERS=${REDUCERS} \ 106 | --hivevar FILE=${FORMAT}" 107 | echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE 108 | i=`expr $i + 1` 109 | done 110 | for t in ${FACTS} 111 | do 112 | COMMAND="$HIVE_BIN -f ddl-tpcds/bin/${t}.sql \ 113 | --hivevar DB=${DATABASE} \ 114 | --hivevar SCALE=${SCALE} \ 115 | --hivevar SOURCE=${TXT_DATABASE} --hivevar BUCKETS=${BUCKETS} \ 116 | --hivevar RETURN_BUCKETS=${RETURN_BUCKETS} --hivevar REDUCERS=${REDUCERS} --hivevar FILE=${FORMAT}" 117 | echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE 118 | i=`expr $i + 1` 119 | done 120 | make -j 1 -f $LOAD_FILE 121 | echo "Loading constraints" 122 | runcommand "$HIVE_BIN -f ddl-tpcds/bin/add_constraints.sql --hivevar DB=${DATABASE}" 123 | echo "Data loaded into database ${DATABASE}." -------------------------------------------------------------------------------- /hive-tpcds-setup/tpcds-setup_partitioned.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function usage { 4 | echo "Usage: tpcds-setup-partitioned.sh scale_factor [temp_directory]" 5 | exit 1 6 | } 7 | 8 | function runcommand { 9 | if [ "X$DEBUG_SCRIPT" != "X" ]; then 10 | $1 11 | else 12 | $1 2>/dev/null 13 | fi 14 | } 15 | 16 | if [ "X$HIVE_BIN" = "X" ]; then 17 | HIVE_BIN=`which hive` 18 | if [ $? -ne 0 ]; then 19 | echo "Script must be run where hive in PATH or HIVE_BIN env variable is set" 20 | exit 1 21 | fi 22 | fi 23 | 24 | # Tables in the TPC-DS schema. 25 | DIMS="date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page web_site" 26 | FACTS="store_sales store_returns web_sales web_returns catalog_sales catalog_returns inventory" 27 | 28 | # Get the parameters. 29 | SCALE=$1 30 | if [ $SCALE -eq 1 ]; then 31 | echo "Scale factor must be greater than 1" 32 | exit 1 33 | fi 34 | 35 | if [ "X$BUCKET_DATA" != "X" ]; then 36 | BUCKETS=13 37 | RETURN_BUCKETS=13 38 | else 39 | BUCKETS=1 40 | RETURN_BUCKETS=1 41 | fi 42 | if [ "X$DEBUG_SCRIPT" != "X" ]; then 43 | set -x 44 | fi 45 | 46 | # Create tables for the specified table format. 47 | if [ "X$FORMAT" = "X" ]; then 48 | FORMAT=orc 49 | fi 50 | LOAD_FILE="load_${FORMAT}_${SCALE}.mk" 51 | SILENCE="2> /dev/null 1> /dev/null" 52 | if [ "X$DEBUG_SCRIPT" != "X" ]; then 53 | SILENCE="" 54 | fi 55 | 56 | echo -e "all: ${DIMS} ${FACTS}" > $LOAD_FILE 57 | 58 | i=1 59 | total=24 60 | SOURCE=tpcds_bin_${FORMAT}_${SCALE} 61 | DATABASE=tpcds_bin_partitioned_${FORMAT}_${SCALE} 62 | MAX_REDUCERS=2500 # maximum number of useful reducers for any scale 63 | REDUCERS=$((test ${SCALE} -gt ${MAX_REDUCERS} && echo ${MAX_REDUCERS}) || echo ${SCALE}) 64 | 65 | runcommand "$HIVE_BIN -f ddl-tpcds/bin_partitioned/create_alltables.sql --hivevar DB=${DATABASE}" 66 | 67 | for t in ${DIMS} 68 | do 69 | COMMAND="$HIVE_BIN -f ddl-tpcds/bin_partitioned/${t}.sql \ 70 | --hivevar DB=${DATABASE} --hivevar SOURCE=${SOURCE} \ 71 | --hivevar SCALE=${SCALE} \ 72 | --hivevar REDUCERS=${REDUCERS} \ 73 | --hivevar FILE=${FORMAT}" 74 | echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE 75 | i=`expr $i + 1` 76 | done 77 | for t in ${FACTS} 78 | do 79 | COMMAND="$HIVE_BIN -f ddl-tpcds/bin_partitioned/${t}.sql \ 80 | --hivevar DB=${DATABASE} \ 81 | --hivevar SCALE=${SCALE} \ 82 | --hivevar SOURCE=${SOURCE} --hivevar BUCKETS=${BUCKETS} \ 83 | --hivevar RETURN_BUCKETS=${RETURN_BUCKETS} --hivevar REDUCERS=${REDUCERS} --hivevar FILE=${FORMAT}" 84 | echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE 85 | i=`expr $i + 1` 86 | done 87 | make -j 1 -f $LOAD_FILE 88 | echo "Loading constraints" 89 | runcommand "$HIVE_BIN -f ddl-tpcds/bin/add_constraints.sql --hivevar DB=${DATABASE}" 90 | echo "Data loaded into database ${DATABASE}." -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 23 | 24 | 4.0.0 25 | 26 | com.ververica 27 | flink-sql-benchmark 28 | 0.1-SNAPSHOT 29 | flink-sql-benchmark 30 | pom 31 | 32 | 33 | 1.16-SNAPSHOT 34 | 3.1.1 35 | 36 | 37 | 38 | flink-tpcds 39 | hive-tpcds-setup 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /tools/common/env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # flink-sql-benchmark install path. 4 | export INSTALL_PATH="../../../flink-sql-benchmark" 5 | export HADOOP_CLASSPATH=`hadoop classpath` 6 | 7 | export FLINK_HOME=${INSTALL_PATH}/packages/flink-1.16.0 8 | export FLINK_TEST_JAR=${INSTALL_PATH}/flink-tpcds/target/flink-tpcds-0.1-SNAPSHOT-jar-with-dependencies.jar 9 | 10 | export SCALE=10000 11 | export FLINK_TEST_DB=tpcds_bin_orc_$SCALE 12 | 13 | # If you try to run TPC-DS tests on partition table, you need to use the below environment variables. 14 | # export FLINK_TEST_DB=tpcds_bin_partitioned_orc_$SCALE 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /tools/common/flink-conf.yaml: -------------------------------------------------------------------------------- 1 | # Cluster 2 | slotmanager.number-of-slots.max: 1500 3 | taskmanager.numberOfTaskSlots: 10 4 | cluster.evenly-spread-out-slots: true 5 | 6 | # Memory 7 | jobmanager.memory.process.size: 24000m 8 | taskmanager.memory.process.size: 24000m 9 | taskmanager.network.memory.fraction: 0.2 10 | taskmanager.network.memory.max: 4096mb 11 | ## Sort shuffle 12 | taskmanager.network.memory.floating-buffers-per-gate: 4096 13 | taskmanager.network.memory.buffers-per-channel: 0 14 | 15 | # Table 16 | table.exec.hive.infer-source-parallelism: true 17 | table.optimizer.join-reorder-enabled: true 18 | ## For Tpcds 10T 19 | table.exec.hive.infer-source-parallelism.max: 1500 20 | table.exec.resource.default-parallelism: 1500 21 | table.optimizer.join.broadcast-threshold: 10485760 22 | 23 | # Basic configs, only for tests 24 | jobstore.expiration-time: 18000 25 | env.java.opts.jobmanager: -XX:+PrintGCDetails -XX:+PrintGCDateStamps 26 | env.java.opts.taskmanager: -XX:+PrintGCDetails -XX:+PrintGCDateStamps 27 | 28 | resourcemanager.taskmanager-timeout: 900000 29 | akka.ask.timeout: 10h -------------------------------------------------------------------------------- /tools/datagen/init_db_for_none_partition_tables.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | source ../common/env.sh 5 | 6 | cd ${INSTALL_PATH}/hive-tpcds-setup 7 | ./tpcds-build.sh 8 | ./tpcds-setup.sh $SCALE 9 | -------------------------------------------------------------------------------- /tools/datagen/init_db_for_partition_tables.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | source ../common/env.sh 5 | 6 | cd ${INSTALL_PATH}/hive-tpcds-setup 7 | ./tpcds-setup_partitioned.sh $SCALE 8 | -------------------------------------------------------------------------------- /tools/flink/run_query.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source ../common/env.sh 3 | num_iters=$1 4 | queryStatement=$2 5 | 6 | if [! -n "$queryStatement"] ;then 7 | $FLINK_HOME/bin/flink run -c com.ververica.flink.benchmark.Benchmark ${FLINK_TEST_JAR} --database ${FLINK_TEST_DB} --hive_conf $HIVE_CONF_DIR --iterations $num_iters 8 | else 9 | $FLINK_HOME/bin/flink run -c com.ververica.flink.benchmark.Benchmark ${FLINK_TEST_JAR} --database ${FLINK_TEST_DB} --hive_conf $HIVE_CONF_DIR --queries $queryStatement --iterations $num_iters 10 | fi 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /tools/stats/analyze_table_stats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source ../common/env.sh 3 | 4 | ${FLINK_HOME}/bin/flink run -c com.ververica.flink.benchmark.AnalyzeTableRunner ${FLINK_TEST_JAR} ${FLINK_TEST_DB} --------------------------------------------------------------------------------