├── README.md ├── tpcdsitem.manifest ├── tpcdsstore.manifest ├── tpcdscustomer.manifest ├── tpcdsdatedim.manifest ├── tpcdscallcenter.manifest ├── data_load.sh ├── tpcdsdhouseholddemographics.manifest ├── datagen.sh ├── tpcdsstoresales.manifest ├── tpcdswebsales.manifest ├── tpcdscatalogsales.manifest ├── data_load.sql ├── copy_files.sh ├── tpcds_queries.sql ├── tpcdsddl.sql ├── tpcdsddl_solution.sql └── tpcds_queries_solution.sql /README.md: -------------------------------------------------------------------------------- 1 | # bigdata-with-redshift 2 | -------------------------------------------------------------------------------- /tpcdsitem.manifest: -------------------------------------------------------------------------------- 1 | { 2 | "entries": [ 3 | {"url":"s3:///item_1_8.dat", "mandatory":true} 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /tpcdsstore.manifest: -------------------------------------------------------------------------------- 1 | { 2 | "entries": [ 3 | {"url":"s3:///store_1_8.dat", "mandatory":true} 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /tpcdscustomer.manifest: -------------------------------------------------------------------------------- 1 | { 2 | "entries": [ 3 | {"url":"s3:///customer_1_8.dat", "mandatory":true} 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /tpcdsdatedim.manifest: -------------------------------------------------------------------------------- 1 | { 2 | "entries": [ 3 | {"url":"s3:///date_dim_1_8.dat", "mandatory":true} 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /tpcdscallcenter.manifest: -------------------------------------------------------------------------------- 1 | { 2 | "entries": [ 3 | {"url":"s3:///call_center_1_8.dat", "mandatory":true} 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /data_load.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export PGPASSWORD=; 3 | psql -h redshift_endpoint -U awsuser -d redshiftcourse -p 5439 -f data_load.sql 4 | -------------------------------------------------------------------------------- /tpcdsdhouseholddemographics.manifest: -------------------------------------------------------------------------------- 1 | { 2 | "entries": [ 3 | {"url":"s3:///household_demographics_1_8.dat", "mandatory":true} 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /datagen.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | cd $HOME/tpcds-kit/tools 4 | ./dsdgen -scale 150 -dir /home/ec2-user/tpcds -parallel 8 -child 1 & 5 | ./dsdgen -scale 150 -dir /home/ec2-user/tpcds -parallel 8 -child 2 & 6 | ./dsdgen -scale 150 -dir /home/ec2-user/tpcds -parallel 8 -child 3 & 7 | ./dsdgen -scale 150 -dir /home/ec2-user/tpcds -parallel 8 -child 4 & 8 | ./dsdgen -scale 150 -dir /home/ec2-user/tpcds -parallel 8 -child 5 & 9 | ./dsdgen -scale 150 -dir /home/ec2-user/tpcds -parallel 8 -child 6 & 10 | ./dsdgen -scale 150 -dir /home/ec2-user/tpcds -parallel 8 -child 7 & 11 | ./dsdgen -scale 150 -dir /home/ec2-user/tpcds -parallel 8 -child 8 & 12 | -------------------------------------------------------------------------------- /tpcdsstoresales.manifest: -------------------------------------------------------------------------------- 1 | { 2 | "entries": [ 3 | {"url":"s3:///store_sales_1_8.dat", "mandatory":true}, 4 | {"url":"s3:///store_sales_2_8.dat", "mandatory":true}, 5 | {"url":"s3:///store_sales_3_8.dat", "mandatory":true}, 6 | {"url":"s3:///store_sales_4_8.dat", "mandatory":true}, 7 | {"url":"s3:///store_sales_5_8.dat", "mandatory":true}, 8 | {"url":"s3:///store_sales_6_8.dat", "mandatory":true}, 9 | {"url":"s3:///store_sales_7_8.dat", "mandatory":true}, 10 | {"url":"s3:///store_sales_8_8.dat", "mandatory":true} 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /tpcdswebsales.manifest: -------------------------------------------------------------------------------- 1 | { 2 | "entries": [ 3 | {"url":"s3:///web_sales_1_8.dat", "mandatory":true}, 4 | {"url":"s3:///web_sales_2_8.dat", "mandatory":true}, 5 | {"url":"s3:///web_sales_3_8.dat", "mandatory":true}, 6 | {"url":"s3:///web_sales_4_8.dat", "mandatory":true}, 7 | {"url":"s3:///web_sales_5_8.dat", "mandatory":true}, 8 | {"url":"s3:///web_sales_6_8.dat", "mandatory":true}, 9 | {"url":"s3:///web_sales_7_8.dat", "mandatory":true}, 10 | {"url":"s3:///web_sales_8_8.dat", "mandatory":true} 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /tpcdscatalogsales.manifest: -------------------------------------------------------------------------------- 1 | { 2 | "entries": [ 3 | {"url":"s3:///catalog_sales_1_8.dat", "mandatory":true}, 4 | {"url":"s3:///catalog_sales_2_8.dat", "mandatory":true}, 5 | {"url":"s3:///catalog_sales_3_8.dat", "mandatory":true}, 6 | {"url":"s3:///catalog_sales_4_8.dat", "mandatory":true}, 7 | {"url":"s3:///catalog_sales_5_8.dat", "mandatory":true}, 8 | {"url":"s3:///catalog_sales_6_8.dat", "mandatory":true}, 9 | {"url":"s3:///catalog_sales_7_8.dat", "mandatory":true}, 10 | {"url":"s3:///catalog_sales_8_8.dat", "mandatory":true} 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /data_load.sql: -------------------------------------------------------------------------------- 1 | copy call_center from 's3://yourbucketname/manifest/tpcdscallcenter.manifest' iam_role 'arn:aws:iam:::role/' manifest; 2 | copy catalog_sales from 's3://yourbucketname/manifest/tpcdscatalogsales.manifest' iam_role 'arn:aws:iam:::role/' manifest; 3 | copy customer from 's3://yourbucketname/manifest/tpcdscustomer.manifest' iam_role 'arn:aws:iam:::role/' ACCEPTINVCHARS manifest; 4 | copy date_dim from 's3://yourbucketname/manifest/tpcdsdatedim.manifest' iam_role 'arn:aws:iam:::role/' manifest; 5 | copy household_demographics from 's3://yourbucketname/manifest/tpcdsdhouseholddemographics.manifest' iam_role 'arn:aws:iam:::role/' manifest; 6 | copy item from 's3://yourbucketname/manifest/tpcdsitem.manifest' iam_role 'arn:aws:iam:::role/' manifest; 7 | copy store from 's3://yourbucketname/manifest/tpcdsstore.manifest' iam_role 'arn:aws:iam:::role/' manifest; 8 | copy store_sales from 's3://yourbucketname/manifest/tpcdsstoresales.manifest' iam_role 'arn:aws:iam:::role/' manifest; 9 | copy web_sales from 's3://yourbucketname/manifest/tpcdswebsales.manifest' iam_role 'arn:aws:iam:::role/' manifest; 10 | -------------------------------------------------------------------------------- /copy_files.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | cd $HOME/tpcds 4 | aws s3 cp call_center_1_8.dat s3:///call_center_1_8.dat 5 | aws s3 cp catalog_sales_1_8.dat s3:///catalog_sales_1_8.dat 6 | aws s3 cp catalog_sales_2_8.dat s3:///catalog_sales_2_8.dat 7 | aws s3 cp catalog_sales_3_8.dat s3:///catalog_sales_3_8.dat 8 | aws s3 cp catalog_sales_4_8.dat s3:///catalog_sales_4_8.dat 9 | aws s3 cp catalog_sales_5_8.dat s3:///catalog_sales_5_8.dat 10 | aws s3 cp catalog_sales_6_8.dat s3:///catalog_sales_6_8.dat 11 | aws s3 cp catalog_sales_7_8.dat s3:///catalog_sales_7_8.dat 12 | aws s3 cp catalog_sales_8_8.dat s3:///catalog_sales_8_8.dat 13 | aws s3 cp customer_1_8.dat s3:///customer_1_8.dat 14 | aws s3 cp date_dim_1_8.dat s3:///date_dim_1_8.dat 15 | aws s3 cp household_demographics_1_8.dat s3:///household_demographics_1_8.dat 16 | aws s3 cp item_1_8.dat s3:///item_1_8.dat 17 | aws s3 cp store_1_8.dat s3:///store_1_8.dat 18 | aws s3 cp store_sales_1_8.dat s3:///store_sales_1_8.dat 19 | aws s3 cp store_sales_2_8.dat s3:///store_sales_2_8.dat 20 | aws s3 cp store_sales_3_8.dat s3:///store_sales_3_8.dat 21 | aws s3 cp store_sales_4_8.dat s3:///store_sales_4_8.dat 22 | aws s3 cp store_sales_5_8.dat s3:///store_sales_5_8.dat 23 | aws s3 cp store_sales_6_8.dat s3:///store_sales_6_8.dat 24 | aws s3 cp store_sales_7_8.dat s3:///store_sales_7_8.dat 25 | aws s3 cp store_sales_8_8.dat s3:///store_sales_8_8.dat 26 | aws s3 cp web_sales_1_8.dat s3:///web_sales_1_8.dat 27 | aws s3 cp web_sales_2_8.dat s3:///web_sales_2_8.dat 28 | aws s3 cp web_sales_3_8.dat s3:///web_sales_3_8.dat 29 | aws s3 cp web_sales_4_8.dat s3:///web_sales_4_8.dat 30 | aws s3 cp web_sales_5_8.dat s3:///web_sales_5_8.dat 31 | aws s3 cp web_sales_6_8.dat s3:///web_sales_6_8.dat 32 | aws s3 cp web_sales_7_8.dat s3:///web_sales_7_8.dat 33 | aws s3 cp web_sales_8_8.dat s3:///web_sales_8_8.dat 34 | -------------------------------------------------------------------------------- /tpcds_queries.sql: -------------------------------------------------------------------------------- 1 | -- These tpc-ds queries have been edited slightly from the output of dsdqgen program. 2 | 3 | -- start query 34 in stream 0 using template query34.tpl 4 | -- Display all customers with specific buy potentials and whose dependent count to vehicle count ratio is larger 5 | -- than 1.2, who in three consecutive years made purchases with between 15 and 20 items in the beginning or the 6 | -- end of each month in stores located in 8 counties. 7 | select c_last_name 8 | ,c_first_name 9 | ,c_salutation 10 | ,c_preferred_cust_flag 11 | ,ss_ticket_number 12 | ,cnt from 13 | (select ss_ticket_number 14 | ,ss_customer_sk 15 | ,count(*) cnt 16 | from store_sales,date_dim,store,household_demographics 17 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 18 | and store_sales.ss_store_sk = store.s_store_sk 19 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 20 | and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) 21 | and (household_demographics.hd_buy_potential = '501-1000' or 22 | household_demographics.hd_buy_potential = '0-500') 23 | and household_demographics.hd_vehicle_count > 0 24 | and (case when household_demographics.hd_vehicle_count > 0 25 | then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count 26 | else null 27 | end) > 1.2 28 | and date_dim.d_year in (1998,1998+1,1998+2) 29 | and store.s_county in ('Williamson County','Ziebach County','Ziebach County','Williamson County', 30 | 'Ziebach County','Ziebach County','Ziebach County','Ziebach County') 31 | group by ss_ticket_number,ss_customer_sk) dn,customer 32 | where ss_customer_sk = c_customer_sk 33 | and cnt between 15 and 20 34 | order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number; 35 | 36 | 37 | -- start query 11 in stream 0 using template query11.tpl 38 | --Find customers whose increase in spending was large over the web than in stores this year compared to last 39 | --year. 40 | 41 | with year_total as ( 42 | select c_customer_id customer_id 43 | ,c_first_name customer_first_name 44 | ,c_last_name customer_last_name 45 | ,c_preferred_cust_flag customer_preferred_cust_flag 46 | ,c_birth_country customer_birth_country 47 | ,c_login customer_login 48 | ,c_email_address customer_email_address 49 | ,d_year dyear 50 | ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total 51 | ,'s' sale_type 52 | from customer 53 | ,store_sales 54 | ,date_dim 55 | where c_customer_sk = ss_customer_sk 56 | and ss_sold_date_sk = d_date_sk 57 | group by c_customer_id 58 | ,c_first_name 59 | ,c_last_name 60 | ,c_preferred_cust_flag 61 | ,c_birth_country 62 | ,c_login 63 | ,c_email_address 64 | ,d_year 65 | union all 66 | select c_customer_id customer_id 67 | ,c_first_name customer_first_name 68 | ,c_last_name customer_last_name 69 | ,c_preferred_cust_flag customer_preferred_cust_flag 70 | ,c_birth_country customer_birth_country 71 | ,c_login customer_login 72 | ,c_email_address customer_email_address 73 | ,d_year dyear 74 | ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total 75 | ,'w' sale_type 76 | from customer 77 | ,web_sales 78 | ,date_dim 79 | where c_customer_sk = ws_bill_customer_sk 80 | and ws_sold_date_sk = d_date_sk 81 | group by c_customer_id 82 | ,c_first_name 83 | ,c_last_name 84 | ,c_preferred_cust_flag 85 | ,c_birth_country 86 | ,c_login 87 | ,c_email_address 88 | ,d_year 89 | ) 90 | select 91 | t_s_secyear.customer_id 92 | ,t_s_secyear.customer_first_name 93 | ,t_s_secyear.customer_last_name 94 | ,t_s_secyear.customer_birth_country 95 | from year_total t_s_firstyear 96 | ,year_total t_s_secyear 97 | ,year_total t_w_firstyear 98 | ,year_total t_w_secyear 99 | where t_s_secyear.customer_id = t_s_firstyear.customer_id 100 | and t_s_firstyear.customer_id = t_w_secyear.customer_id 101 | and t_s_firstyear.customer_id = t_w_firstyear.customer_id 102 | and t_s_firstyear.sale_type = 's' 103 | and t_w_firstyear.sale_type = 'w' 104 | and t_s_secyear.sale_type = 's' 105 | and t_w_secyear.sale_type = 'w' 106 | and t_s_firstyear.dyear = 1998 107 | and t_s_secyear.dyear = 1998+1 108 | and t_w_firstyear.dyear = 1998 109 | and t_w_secyear.dyear = 1998+1 110 | and t_s_firstyear.year_total > 0 111 | and t_w_firstyear.year_total > 0 112 | and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end 113 | > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end 114 | order by t_s_secyear.customer_id 115 | ,t_s_secyear.customer_first_name 116 | ,t_s_secyear.customer_last_name 117 | ,t_s_secyear.customer_birth_country 118 | ; 119 | 120 | -- query 57 121 | --Find the item brands and categories for each call center and their monthly sales figures for a specified year, 122 | -- where the monthly sales figure deviated more than 10% of the average monthly sales for the year, sorted by 123 | -- deviation and call center. Report the sales deviation from the previous and following month. 124 | -- query 57 (edited) 125 | 126 | select i_category, i_brand, cc_name, d_year, d_moy, 127 | sum(cs_sales_price) sum_sales, 128 | avg(sum(cs_sales_price)) over 129 | (partition by i_category, i_brand, 130 | cc_name, d_year) 131 | avg_monthly_sales, 132 | rank() over 133 | (partition by i_category, i_brand, 134 | cc_name 135 | order by d_year, d_moy) rn 136 | from item, catalog_sales, date_dim , call_center 137 | where cs_item_sk = i_item_sk and 138 | cs_sold_date_sk = d_date_sk and 139 | cc_call_center_sk= cs_call_center_sk 140 | group by i_category, i_brand, 141 | cc_name , d_year, d_moy; 142 | -------------------------------------------------------------------------------- /tpcdsddl.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE public.call_center ( 2 | cc_call_center_sk int8 NOT NULL, 3 | cc_call_center_id char(16) NOT NULL, 4 | cc_rec_start_date date, 5 | cc_rec_end_date date, 6 | cc_closed_date_sk int8, 7 | cc_open_date_sk int8, 8 | cc_name varchar(50), 9 | cc_class varchar(50), 10 | cc_employees int8, 11 | cc_sq_ft int8, 12 | cc_hours varchar(20), 13 | cc_manager varchar(40), 14 | cc_mkt_id int8, 15 | cc_mkt_class varchar(50), 16 | cc_mkt_desc varchar(100), 17 | cc_market_manager varchar(40), 18 | cc_division int8, 19 | cc_division_name varchar(50), 20 | cc_company int8, 21 | cc_company_name varchar(50), 22 | cc_street_number varchar(10), 23 | cc_street_name varchar(60), 24 | cc_street_type varchar(15), 25 | cc_suite_number varchar(10), 26 | cc_city varchar(60), 27 | cc_county varchar(30), 28 | cc_state char(2), 29 | cc_zip char(10), 30 | cc_country varchar(20), 31 | cc_gmt_offset numeric(5,2), 32 | cc_tax_percentage numeric(5,2) 33 | ); 34 | 35 | CREATE TABLE public.catalog_sales ( 36 | cs_sold_date_sk int8, 37 | cs_sold_time_sk int8, 38 | cs_ship_date_sk int8, 39 | cs_bill_customer_sk int8, 40 | cs_bill_cdemo_sk int8, 41 | cs_bill_hdemo_sk int8, 42 | cs_bill_addr_sk int8, 43 | cs_ship_customer_sk int8, 44 | cs_ship_cdemo_sk int8, 45 | cs_ship_hdemo_sk int8, 46 | cs_ship_addr_sk int8, 47 | cs_call_center_sk int8, 48 | cs_catalog_page_sk int8, 49 | cs_ship_mode_sk int8, 50 | cs_warehouse_sk int8, 51 | cs_item_sk int8 NOT NULL, 52 | cs_promo_sk int8, 53 | cs_order_number int8 NOT NULL, 54 | cs_quantity int8, 55 | cs_wholesale_cost numeric(7,2), 56 | cs_list_price numeric(7,2), 57 | cs_sales_price numeric(7,2), 58 | cs_ext_discount_amt numeric(7,2), 59 | cs_ext_sales_price numeric(7,2), 60 | cs_ext_wholesale_cost numeric(7,2), 61 | cs_ext_list_price numeric(7,2), 62 | cs_ext_tax numeric(7,2), 63 | cs_coupon_amt numeric(7,2), 64 | cs_ext_ship_cost numeric(7,2), 65 | cs_net_paid numeric(7,2), 66 | cs_net_paid_inc_tax numeric(7,2), 67 | cs_net_paid_inc_ship numeric(7,2), 68 | cs_net_paid_inc_ship_tax numeric(7,2), 69 | cs_net_profit numeric(7,2) 70 | ); 71 | 72 | CREATE TABLE public.customer ( 73 | c_customer_sk int8 NOT NULL, 74 | c_customer_id char(16) NOT NULL, 75 | c_current_cdemo_sk int8, 76 | c_current_hdemo_sk int8, 77 | c_current_addr_sk int8, 78 | c_first_shipto_date_sk int8, 79 | c_first_sales_date_sk int8, 80 | c_salutation char(10), 81 | c_first_name char(20), 82 | c_last_name char(30), 83 | c_preferred_cust_flag char(1), 84 | c_birth_day int8, 85 | c_birth_month int8, 86 | c_birth_year int8, 87 | c_birth_country varchar(20), 88 | c_login char(13), 89 | c_email_address char(50), 90 | c_last_review_date int8 91 | ); 92 | 93 | CREATE TABLE public.date_dim ( 94 | d_date_sk int8 NOT NULL, 95 | d_date_id char(16) NOT NULL, 96 | d_date date, 97 | d_month_seq int8, 98 | d_week_seq int8, 99 | d_quarter_seg int8, 100 | d_year int8, 101 | d_dow int8, 102 | d_moy int8, 103 | d_dom int8, 104 | d_qoy int8, 105 | d_fy_year int8, 106 | d_fy_quarter_seq int8, 107 | d_fy_week_seq int8, 108 | d_day_name char(9), 109 | d_quarter_name char(6), 110 | d_holiday char(1), 111 | d_weekend char(1), 112 | d_following_holiday char(1), 113 | d_first_dom int8, 114 | d_last_dom int8, 115 | d_same_day_1y int8, 116 | d_same_day_1q int8, 117 | d_current_day char(1), 118 | d_current_week char(1), 119 | d_current_month char(1), 120 | d_current_quarter char(1), 121 | d_current_year char(1) 122 | ); 123 | 124 | CREATE TABLE public.household_demographics ( 125 | hd_demo_sk int8 NOT NULL, 126 | hd_income_band_sk int8 NOT NULL, 127 | hd_buy_potential char(15), 128 | hd_dep_count int8, 129 | hd_vehicle_count int8 130 | ); 131 | 132 | CREATE TABLE public.item ( 133 | i_item_sk int8 NOT NULL, 134 | i_item_id char(16) NOT NULL, 135 | i_rec_start_date date, 136 | i_rec_end_date date, 137 | i_item_desc varchar(200), 138 | i_current_price numeric(7,2), 139 | i_wholesale_cost numeric(7,2), 140 | i_brand_id int8, 141 | i_brand char(50), 142 | i_class_id int8, 143 | i_class char(50), 144 | i_category_id int8, 145 | i_category char(50), 146 | i_manufact_id int8, 147 | i_manufact char(50), 148 | i_size char(20), 149 | i_formulation char(20), 150 | i_color char(20), 151 | i_units char(10), 152 | i_container char(10), 153 | i_manager_id int8, 154 | i_product_name char(50) 155 | ); 156 | 157 | CREATE TABLE public.store ( 158 | s_store_sk int8 NOT NULL, 159 | s_store_id char(16) NOT NULL, 160 | s_rec_start_date date, 161 | s_rec_end_date date, 162 | s_closed_date_sk int8, 163 | s_store_name varchar(50), 164 | s_number_employees int8, 165 | s_floor_space int8, 166 | s_hours char(20), 167 | s_manager varchar(40), 168 | s_market_id int8, 169 | s_geography_class varchar(100), 170 | s_market_desc varchar(100), 171 | s_market_manager varchar(40), 172 | s_division_id int8, 173 | s_division_name varchar(40), 174 | s_company_id int8, 175 | s_company_name varchar(50), 176 | s_street_number varchar(10), 177 | s_street_name varchar(60), 178 | s_street_type varchar(15), 179 | s_suite_number varchar(10), 180 | s_city varchar(60), 181 | s_county varchar(30), 182 | s_state char(2), 183 | s_zip char(10), 184 | s_country varchar(20), 185 | s_gmt_offset numeric(5,2), 186 | s_tax_percentage numeric(5,2) 187 | ); 188 | 189 | CREATE TABLE public.store_sales ( 190 | ss_sold_date_sk int8, 191 | ss_sold_time_sk int8, 192 | ss_item_sk int8 NOT NULL, 193 | ss_customer_sk int8, 194 | ss_cdemo_sk int8, 195 | ss_hdemo_sk int8, 196 | ss_addr_sk int8, 197 | ss_store_sk int8, 198 | ss_promo_sk int8, 199 | ss_ticket_number int8 NOT NULL, 200 | ss_quantity int8, 201 | ss_wholesale_cost numeric(7,2), 202 | ss_list_price numeric(7,2), 203 | ss_sales_price numeric(7,2), 204 | ss_ext_discount_amt numeric(7,2), 205 | ss_ext_sales_price numeric(7,2), 206 | ss_ext_wholesale_cost numeric(7,2), 207 | ss_ext_list_price numeric(7,2), 208 | ss_ext_tax numeric(7,2), 209 | ss_coupon_amt numeric(7,2), 210 | ss_net_paid numeric(7,2), 211 | ss_net_paid_inc_tax numeric(7,2), 212 | ss_net_profit numeric(7,2) 213 | ); 214 | 215 | CREATE TABLE public.web_sales ( 216 | ws_sold_date_sk int8, 217 | ws_sold_time_sk int8, 218 | ws_ship_date_sk int8, 219 | ws_item_sk int8 NOT NULL, 220 | ws_bill_customer_sk int8, 221 | ws_bill_cdemo_sk int8, 222 | ws_bill_hdemo_sk int8, 223 | ws_bill_addr_sk int8, 224 | ws_ship_customer_sk int8, 225 | ws_ship_cdemo_sk int8, 226 | ws_ship_hdemo_sk int8, 227 | ws_ship_addr_sk int8, 228 | ws_web_page_sk int8, 229 | ws_web_site_sk int8, 230 | ws_ship_mode_sk int8, 231 | ws_warehouse_sk int8, 232 | ws_promo_sk int8, 233 | ws_order_number int8 NOT NULL, 234 | ws_quantity int8, 235 | ws_wholesale_cost numeric(7,2), 236 | ws_list_price numeric(7,2), 237 | ws_sales_price numeric(7,2), 238 | ws_ext_discount_amt numeric(7,2), 239 | ws_ext_sales_price numeric(7,2), 240 | ws_ext_wholesale_cost numeric(7,2), 241 | ws_ext_list_price numeric(7,2), 242 | ws_ext_tax numeric(7,2), 243 | ws_coupon_amt numeric(7,2), 244 | ws_ext_ship_cost numeric(7,2), 245 | ws_net_paid numeric(7,2), 246 | ws_net_paid_inc_tax numeric(7,2), 247 | ws_net_paid_inc_ship numeric(7,2), 248 | ws_net_paid_inc_ship_tax numeric(7,2), 249 | ws_net_profit numeric(7,2) 250 | ); 251 | -------------------------------------------------------------------------------- /tpcdsddl_solution.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE public.call_center_all ( 2 | cc_call_center_sk int8 NOT NULL, 3 | cc_call_center_id char(16) NOT NULL, 4 | cc_rec_start_date date, 5 | cc_rec_end_date date, 6 | cc_closed_date_sk int8, 7 | cc_open_date_sk int8, 8 | cc_name varchar(50), 9 | cc_class varchar(50), 10 | cc_employees int8, 11 | cc_sq_ft int8, 12 | cc_hours varchar(20), 13 | cc_manager varchar(40), 14 | cc_mkt_id int8, 15 | cc_mkt_class varchar(50), 16 | cc_mkt_desc varchar(100), 17 | cc_market_manager varchar(40), 18 | cc_division int8, 19 | cc_division_name varchar(50), 20 | cc_company int8, 21 | cc_company_name varchar(50), 22 | cc_street_number varchar(10), 23 | cc_street_name varchar(60), 24 | cc_street_type varchar(15), 25 | cc_suite_number varchar(10), 26 | cc_city varchar(60), 27 | cc_county varchar(30), 28 | cc_state char(2), 29 | cc_zip char(10), 30 | cc_country varchar(20), 31 | cc_gmt_offset numeric(5,2), 32 | cc_tax_percentage numeric(5,2) 33 | ) diststyle all; 34 | 35 | CREATE TABLE public.date_dim_all ( 36 | d_date_sk int8 NOT NULL, 37 | d_date_id char(16) NOT NULL, 38 | d_date date, 39 | d_month_seq int8, 40 | d_week_seq int8, 41 | d_quarter_seg int8, 42 | d_year int8, 43 | d_dow int8, 44 | d_moy int8, 45 | d_dom int8, 46 | d_qoy int8, 47 | d_fy_year int8, 48 | d_fy_quarter_seq int8, 49 | d_fy_week_seq int8, 50 | d_day_name char(9), 51 | d_quarter_name char(6), 52 | d_holiday char(1), 53 | d_weekend char(1), 54 | d_following_holiday char(1), 55 | d_first_dom int8, 56 | d_last_dom int8, 57 | d_same_day_1y int8, 58 | d_same_day_1q int8, 59 | d_current_day char(1), 60 | d_current_week char(1), 61 | d_current_month char(1), 62 | d_current_quarter char(1), 63 | d_current_year char(1) 64 | ) diststyle all; 65 | 66 | CREATE TABLE public.item_all ( 67 | i_item_sk int8 NOT NULL, 68 | i_item_id char(16) NOT NULL, 69 | i_rec_start_date date, 70 | i_rec_end_date date, 71 | i_item_desc varchar(200), 72 | i_current_price numeric(7,2), 73 | i_wholesale_cost numeric(7,2), 74 | i_brand_id int8, 75 | i_brand char(50), 76 | i_class_id int8, 77 | i_class char(50), 78 | i_category_id int8, 79 | i_category char(50), 80 | i_manufact_id int8, 81 | i_manufact char(50), 82 | i_size char(20), 83 | i_formulation char(20), 84 | i_color char(20), 85 | i_units char(10), 86 | i_container char(10), 87 | i_manager_id int8, 88 | i_product_name char(50) 89 | ) diststyle all; 90 | 91 | CREATE TABLE public.catalog_sales_dist ( 92 | cs_sold_date_sk int8, 93 | cs_sold_time_sk int8, 94 | cs_ship_date_sk int8, 95 | cs_bill_customer_sk int8, 96 | cs_bill_cdemo_sk int8, 97 | cs_bill_hdemo_sk int8, 98 | cs_bill_addr_sk int8, 99 | cs_ship_customer_sk int8, 100 | cs_ship_cdemo_sk int8, 101 | cs_ship_hdemo_sk int8, 102 | cs_ship_addr_sk int8, 103 | cs_call_center_sk int8, 104 | cs_catalog_page_sk int8, 105 | cs_ship_mode_sk int8, 106 | cs_warehouse_sk int8, 107 | cs_item_sk int8 NOT NULL DISTKEY, 108 | cs_promo_sk int8, 109 | cs_order_number int8 NOT NULL, 110 | cs_quantity int8, 111 | cs_wholesale_cost numeric(7,2), 112 | cs_list_price numeric(7,2), 113 | cs_sales_price numeric(7,2), 114 | cs_ext_discount_amt numeric(7,2), 115 | cs_ext_sales_price numeric(7,2), 116 | cs_ext_wholesale_cost numeric(7,2), 117 | cs_ext_list_price numeric(7,2), 118 | cs_ext_tax numeric(7,2), 119 | cs_coupon_amt numeric(7,2), 120 | cs_ext_ship_cost numeric(7,2), 121 | cs_net_paid numeric(7,2), 122 | cs_net_paid_inc_tax numeric(7,2), 123 | cs_net_paid_inc_ship numeric(7,2), 124 | cs_net_paid_inc_ship_tax numeric(7,2), 125 | cs_net_profit numeric(7,2) 126 | ); 127 | 128 | CREATE TABLE public.customer_all ( 129 | c_customer_sk int8 NOT NULL, 130 | c_customer_id char(16) NOT NULL, 131 | c_current_cdemo_sk int8, 132 | c_current_hdemo_sk int8, 133 | c_current_addr_sk int8, 134 | c_first_shipto_date_sk int8, 135 | c_first_sales_date_sk int8, 136 | c_salutation char(10), 137 | c_first_name char(20), 138 | c_last_name char(30), 139 | c_preferred_cust_flag char(1), 140 | c_birth_day int8, 141 | c_birth_month int8, 142 | c_birth_year int8, 143 | c_birth_country varchar(20), 144 | c_login char(13), 145 | c_email_address char(50), 146 | c_last_review_date int8 147 | ) diststyle all; 148 | 149 | CREATE TABLE public.household_demographics_all ( 150 | hd_demo_sk int8 NOT NULL, 151 | hd_income_band_sk int8 NOT NULL, 152 | hd_buy_potential char(15), 153 | hd_dep_count int8, 154 | hd_vehicle_count int8 155 | ) diststyle all; 156 | 157 | CREATE TABLE public.store_all ( 158 | s_store_sk int8 NOT NULL, 159 | s_store_id char(16) NOT NULL, 160 | s_rec_start_date date, 161 | s_rec_end_date date, 162 | s_closed_date_sk int8, 163 | s_store_name varchar(50), 164 | s_number_employees int8, 165 | s_floor_space int8, 166 | s_hours char(20), 167 | s_manager varchar(40), 168 | s_market_id int8, 169 | s_geography_class varchar(100), 170 | s_market_desc varchar(100), 171 | s_market_manager varchar(40), 172 | s_division_id int8, 173 | s_division_name varchar(40), 174 | s_company_id int8, 175 | s_company_name varchar(50), 176 | s_street_number varchar(10), 177 | s_street_name varchar(60), 178 | s_street_type varchar(15), 179 | s_suite_number varchar(10), 180 | s_city varchar(60), 181 | s_county varchar(30), 182 | s_state char(2), 183 | s_zip char(10), 184 | s_country varchar(20), 185 | s_gmt_offset numeric(5,2), 186 | s_tax_percentage numeric(5,2) 187 | ) diststyle all; 188 | 189 | CREATE TABLE public.store_sales_dist ( 190 | ss_sold_date_sk int8 sortkey, 191 | ss_sold_time_sk int8, 192 | ss_item_sk int8 NOT NULL, 193 | ss_customer_sk int8, 194 | ss_cdemo_sk int8, 195 | ss_hdemo_sk int8, 196 | ss_addr_sk int8, 197 | ss_store_sk int8 distkey, 198 | ss_promo_sk int8, 199 | ss_ticket_number int8 NOT NULL, 200 | ss_quantity int8, 201 | ss_wholesale_cost numeric(7,2), 202 | ss_list_price numeric(7,2), 203 | ss_sales_price numeric(7,2), 204 | ss_ext_discount_amt numeric(7,2), 205 | ss_ext_sales_price numeric(7,2), 206 | ss_ext_wholesale_cost numeric(7,2), 207 | ss_ext_list_price numeric(7,2), 208 | ss_ext_tax numeric(7,2), 209 | ss_coupon_amt numeric(7,2), 210 | ss_net_paid numeric(7,2), 211 | ss_net_paid_inc_tax numeric(7,2), 212 | ss_net_profit numeric(7,2)); 213 | 214 | 215 | CREATE TABLE public.store_sales_dist_sort ( 216 | ss_sold_date_sk int8 sortkey, 217 | ss_sold_time_sk int8, 218 | ss_item_sk int8 NOT NULL, 219 | ss_customer_sk int8 distkey, 220 | ss_cdemo_sk int8, 221 | ss_hdemo_sk int8, 222 | ss_addr_sk int8, 223 | ss_store_sk int8, 224 | ss_promo_sk int8, 225 | ss_ticket_number int8 NOT NULL, 226 | ss_quantity int8, 227 | ss_wholesale_cost numeric(7,2), 228 | ss_list_price numeric(7,2), 229 | ss_sales_price numeric(7,2), 230 | ss_ext_discount_amt numeric(7,2), 231 | ss_ext_sales_price numeric(7,2), 232 | ss_ext_wholesale_cost numeric(7,2), 233 | ss_ext_list_price numeric(7,2), 234 | ss_ext_tax numeric(7,2), 235 | ss_coupon_amt numeric(7,2), 236 | ss_net_paid numeric(7,2), 237 | ss_net_paid_inc_tax numeric(7,2), 238 | ss_net_profit numeric(7,2)); 239 | 240 | CREATE TABLE public.web_sales_dist_sort ( 241 | ws_sold_date_sk int8 sortkey, 242 | ws_sold_time_sk int8, 243 | ws_ship_date_sk int8, 244 | ws_item_sk int8 NOT NULL, 245 | ws_bill_customer_sk int8 distkey, 246 | ws_bill_cdemo_sk int8, 247 | ws_bill_hdemo_sk int8, 248 | ws_bill_addr_sk int8, 249 | ws_ship_customer_sk int8, 250 | ws_ship_cdemo_sk int8, 251 | ws_ship_hdemo_sk int8, 252 | ws_ship_addr_sk int8, 253 | ws_web_page_sk int8, 254 | ws_web_site_sk int8, 255 | ws_ship_mode_sk int8, 256 | ws_warehouse_sk int8, 257 | ws_promo_sk int8, 258 | ws_order_number int8 NOT NULL, 259 | ws_quantity int8, 260 | ws_wholesale_cost numeric(7,2), 261 | ws_list_price numeric(7,2), 262 | ws_sales_price numeric(7,2), 263 | ws_ext_discount_amt numeric(7,2), 264 | ws_ext_sales_price numeric(7,2), 265 | ws_ext_wholesale_cost numeric(7,2), 266 | ws_ext_list_price numeric(7,2), 267 | ws_ext_tax numeric(7,2), 268 | ws_coupon_amt numeric(7,2), 269 | ws_ext_ship_cost numeric(7,2), 270 | ws_net_paid numeric(7,2), 271 | ws_net_paid_inc_tax numeric(7,2), 272 | ws_net_paid_inc_ship numeric(7,2), 273 | ws_net_paid_inc_ship_tax numeric(7,2), 274 | ws_net_profit numeric(7,2)); 275 | -------------------------------------------------------------------------------- /tpcds_queries_solution.sql: -------------------------------------------------------------------------------- 1 | -- These tpc-ds queries have been edited slightly from the output of dsdqgen program. 2 | 3 | -- start query 34 in stream 0 using template query34.tpl 4 | -- referencing store_sales_dist,date_dim_all,store_all,household_demographics_all 5 | select c_last_name 6 | ,c_first_name 7 | ,c_salutation 8 | ,c_preferred_cust_flag 9 | ,ss_ticket_number 10 | ,cnt from 11 | (select ss_ticket_number 12 | ,ss_customer_sk 13 | ,count(*) cnt 14 | from store_sales_dist,date_dim_all,store_all,household_demographics_all 15 | where store_sales_dist.ss_sold_date_sk = date_dim_all.d_date_sk 16 | and store_sales_dist.ss_store_sk = store_all.s_store_sk 17 | and store_sales_dist.ss_hdemo_sk = household_demographics_all.hd_demo_sk 18 | and (date_dim_all.d_dom between 1 and 3 or date_dim_all.d_dom between 25 and 28) 19 | and (household_demographics_all.hd_buy_potential = '501-1000' or 20 | household_demographics_all.hd_buy_potential = '0-500') 21 | and household_demographics_all.hd_vehicle_count > 0 22 | and (case when household_demographics_all.hd_vehicle_count > 0 23 | then household_demographics_all.hd_dep_count/ household_demographics_all.hd_vehicle_count 24 | else null 25 | end) > 1.2 26 | and date_dim_all.d_year in (1998,1998+1,1998+2) 27 | and store_all.s_county in ('Williamson County','Ziebach County','Ziebach County','Williamson County', 28 | 'Ziebach County','Ziebach County','Ziebach County','Ziebach County') 29 | group by ss_ticket_number,ss_customer_sk) dn,customer 30 | where ss_customer_sk = c_customer_sk 31 | and cnt between 15 and 20 32 | order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number; 33 | 34 | -- start query 11 in stream 0 using template query11.tpl 35 | --Find customers whose increase in spending was large over the web than in stores this year compared to last 36 | --year. 37 | -- referencing customer_all and date_dim_all 38 | 39 | with year_total as ( 40 | select c_customer_id customer_id 41 | ,c_first_name customer_first_name 42 | ,c_last_name customer_last_name 43 | ,c_preferred_cust_flag customer_preferred_cust_flag 44 | ,c_birth_country customer_birth_country 45 | ,c_login customer_login 46 | ,c_email_address customer_email_address 47 | ,d_year dyear 48 | ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total 49 | ,'s' sale_type 50 | from customer_all 51 | ,store_sales 52 | ,date_dim_all 53 | where c_customer_sk = ss_customer_sk 54 | and ss_sold_date_sk = d_date_sk 55 | group by c_customer_id 56 | ,c_first_name 57 | ,c_last_name 58 | ,c_preferred_cust_flag 59 | ,c_birth_country 60 | ,c_login 61 | ,c_email_address 62 | ,d_year 63 | union all 64 | select c_customer_id customer_id 65 | ,c_first_name customer_first_name 66 | ,c_last_name customer_last_name 67 | ,c_preferred_cust_flag customer_preferred_cust_flag 68 | ,c_birth_country customer_birth_country 69 | ,c_login customer_login 70 | ,c_email_address customer_email_address 71 | ,d_year dyear 72 | ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total 73 | ,'w' sale_type 74 | from customer_all 75 | ,web_sales 76 | ,date_dim_all 77 | where c_customer_sk = ws_bill_customer_sk 78 | and ws_sold_date_sk = d_date_sk 79 | group by c_customer_id 80 | ,c_first_name 81 | ,c_last_name 82 | ,c_preferred_cust_flag 83 | ,c_birth_country 84 | ,c_login 85 | ,c_email_address 86 | ,d_year 87 | ) 88 | select 89 | t_s_secyear.customer_id 90 | ,t_s_secyear.customer_first_name 91 | ,t_s_secyear.customer_last_name 92 | ,t_s_secyear.customer_birth_country 93 | from year_total t_s_firstyear 94 | ,year_total t_s_secyear 95 | ,year_total t_w_firstyear 96 | ,year_total t_w_secyear 97 | where t_s_secyear.customer_id = t_s_firstyear.customer_id 98 | and t_s_firstyear.customer_id = t_w_secyear.customer_id 99 | and t_s_firstyear.customer_id = t_w_firstyear.customer_id 100 | and t_s_firstyear.sale_type = 's' 101 | and t_w_firstyear.sale_type = 'w' 102 | and t_s_secyear.sale_type = 's' 103 | and t_w_secyear.sale_type = 'w' 104 | and t_s_firstyear.dyear = 1998 105 | and t_s_secyear.dyear = 1998+1 106 | and t_w_firstyear.dyear = 1998 107 | and t_w_secyear.dyear = 1998+1 108 | and t_s_firstyear.year_total > 0 109 | and t_w_firstyear.year_total > 0 110 | and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end 111 | > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end 112 | order by t_s_secyear.customer_id 113 | ,t_s_secyear.customer_first_name 114 | ,t_s_secyear.customer_last_name 115 | ,t_s_secyear.customer_birth_country 116 | ; 117 | 118 | - start query 11 in stream 0 using template query11.tpl 119 | --Find customers whose increase in spending was large over the web than in stores this year compared to last 120 | --year. 121 | -- referencing customer_all, date_dim_all, store_sales_dist_sort and web_sales_dist_sort 122 | 123 | with year_total as ( 124 | select c_customer_id customer_id 125 | ,c_first_name customer_first_name 126 | ,c_last_name customer_last_name 127 | ,c_preferred_cust_flag customer_preferred_cust_flag 128 | ,c_birth_country customer_birth_country 129 | ,c_login customer_login 130 | ,c_email_address customer_email_address 131 | ,d_year dyear 132 | ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total 133 | ,'s' sale_type 134 | from customer_all 135 | ,store_sales_dist_sort 136 | ,date_dim_all 137 | where c_customer_sk = ss_customer_sk 138 | and ss_sold_date_sk = d_date_sk 139 | group by c_customer_id 140 | ,c_first_name 141 | ,c_last_name 142 | ,c_preferred_cust_flag 143 | ,c_birth_country 144 | ,c_login 145 | ,c_email_address 146 | ,d_year 147 | union all 148 | select c_customer_id customer_id 149 | ,c_first_name customer_first_name 150 | ,c_last_name customer_last_name 151 | ,c_preferred_cust_flag customer_preferred_cust_flag 152 | ,c_birth_country customer_birth_country 153 | ,c_login customer_login 154 | ,c_email_address customer_email_address 155 | ,d_year dyear 156 | ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total 157 | ,'w' sale_type 158 | from customer_all 159 | ,web_sales_dist_sort 160 | ,date_dim_all 161 | where c_customer_sk = ws_bill_customer_sk 162 | and ws_sold_date_sk = d_date_sk 163 | group by c_customer_id 164 | ,c_first_name 165 | ,c_last_name 166 | ,c_preferred_cust_flag 167 | ,c_birth_country 168 | ,c_login 169 | ,c_email_address 170 | ,d_year 171 | ) 172 | select 173 | t_s_secyear.customer_id 174 | ,t_s_secyear.customer_first_name 175 | ,t_s_secyear.customer_last_name 176 | ,t_s_secyear.customer_birth_country 177 | from year_total t_s_firstyear 178 | ,year_total t_s_secyear 179 | ,year_total t_w_firstyear 180 | ,year_total t_w_secyear 181 | where t_s_secyear.customer_id = t_s_firstyear.customer_id 182 | and t_s_firstyear.customer_id = t_w_secyear.customer_id 183 | and t_s_firstyear.customer_id = t_w_firstyear.customer_id 184 | and t_s_firstyear.sale_type = 's' 185 | and t_w_firstyear.sale_type = 'w' 186 | and t_s_secyear.sale_type = 's' 187 | and t_w_secyear.sale_type = 'w' 188 | and t_s_firstyear.dyear = 1998 189 | and t_s_secyear.dyear = 1998+1 190 | and t_w_firstyear.dyear = 1998 191 | and t_w_secyear.dyear = 1998+1 192 | and t_s_firstyear.year_total > 0 193 | and t_w_firstyear.year_total > 0 194 | and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end 195 | > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end 196 | order by t_s_secyear.customer_id 197 | ,t_s_secyear.customer_first_name 198 | ,t_s_secyear.customer_last_name 199 | ,t_s_secyear.customer_birth_country 200 | ; 201 | 202 | -- query 57 203 | --Find the item brands and categories for each call center and their monthly sales figures for a specified year, 204 | -- where the monthly sales figure deviated more than 10% of the average monthly sales for the year, sorted by 205 | -- deviation and call center. Report the sales deviation from the previous and following month. 206 | 207 | -- query 57 (edited) 208 | -- query referencing item_all, date_dim_all , call_center_all 209 | select i_category, i_brand, cc_name, d_year, d_moy, 210 | sum(cs_sales_price) sum_sales, 211 | avg(sum(cs_sales_price)) over 212 | (partition by i_category, i_brand, 213 | cc_name, d_year) 214 | avg_monthly_sales, 215 | rank() over 216 | (partition by i_category, i_brand, 217 | cc_name 218 | order by d_year, d_moy) rn 219 | from item_all, catalog_sales, date_dim_all , call_center_all 220 | where cs_item_sk = i_item_sk and 221 | cs_sold_date_sk = d_date_sk and 222 | cc_call_center_sk= cs_call_center_sk 223 | group by i_category, i_brand, 224 | cc_name , d_year, d_moy; 225 | 226 | 227 | -- query 57 (edited) 228 | -- query referencing catalog_sales_dist, item_all, date_dim_all , call_center_all 229 | select i_category, i_brand, cc_name, d_year, d_moy, 230 | sum(cs_sales_price) sum_sales, 231 | avg(sum(cs_sales_price)) over 232 | (partition by i_category, i_brand, 233 | cc_name, d_year) 234 | avg_monthly_sales, 235 | rank() over 236 | (partition by i_category, i_brand, 237 | cc_name 238 | order by d_year, d_moy) rn 239 | from item_all, catalog_sales_dist, date_dim_all , call_center_all 240 | where cs_item_sk = i_item_sk and 241 | cs_sold_date_sk = d_date_sk and 242 | cc_call_center_sk= cs_call_center_sk 243 | group by i_category, i_brand, 244 | cc_name , d_year, d_moy; 245 | --------------------------------------------------------------------------------