├── .gitignore ├── Chapter_02 ├── Chapter_02.sql ├── Orders_2023-07-07.csv ├── Orders_2023-07-08.csv └── Orders_2023-07-09.csv ├── Chapter_03 ├── 202308 │ ├── Orders_2023-08-07.csv │ └── Orders_2023-08-08.csv ├── Chapter_03.sql └── Orders_2023-08-04.csv ├── Chapter_04 ├── Chapter_04_Part1.sql ├── Chapter_04_Part2.sql ├── Chapter_04_Part3.sql └── Orders_2023-08-11.json ├── Chapter_05 ├── Chapter_05.sql ├── Orders_2023-09-04_12-30-00_12345.json ├── Orders_2023-09-04_12-30-00_12346.json ├── Orders_2023-09-04_12-45-00_12347.json ├── Orders_2023-09-04_12-45-00_12348.json ├── Orders_2023-09-04_13-00-00_12349.json ├── Orders_2023-09-04_14-00-00_12350.json ├── Orders_2023-09-04_14-30-00_12351.json └── Orders_2023-09-04_14-30-00_12352.json ├── Chapter_06 ├── Chapter_06_Part1.sql ├── Chapter_06_Part2_Snowpark_is_holiday.py ├── Chapter_06_Part3_Snowpark_proc_is_holiday.sql ├── Chapter_06_Part4_Snowpark_connection.py ├── Chapter_06_Part5_Snowpark_connection_params.py ├── Chapter_06_Part6_Snowpark_dim_date.py ├── Chapter_06_Part7_Snowpark_ingest_CSV.py ├── Chapter_06_Part8_Snowpark_data_frames.py ├── Chapter_06_Part9.sql ├── Orders_2023-07-07.csv ├── Orders_2023-07-08.csv ├── Orders_2023-07-09.csv └── connection_parameters.json ├── Chapter_07 ├── Chapter_07_Part1_customer_reviews.sql ├── Chapter_07_Part2_review_sentiment.sql └── Chapter_07_Part3_read_emails.sql ├── Chapter_08 └── Chapter_08.sql ├── Chapter_09 └── Chapter_09.sql ├── Chapter_10 ├── Chapter_10_Part1_role_based_access_control.sql ├── Chapter_10_Part2_row_access_policy.sql └── Chapter_10_Part3_masking_policy.sql ├── Chapter_11 ├── Chapter_11_Part1_create_schema.sql ├── Chapter_11_Part2_EXT_layer.sql ├── Chapter_11_Part3_STG_layer_from_json_files.sql ├── Chapter_11_Part4_STG_layer_from_database.sql ├── Chapter_11_Part5_DWH_layer.sql ├── Chapter_11_Part6_MGMT_layer.sql ├── Orders_2023-09-01.json └── Orders_2023-09-04.json ├── Chapter_12 ├── Chapter_12_Part1_stream_orders.sql ├── Chapter_12_Part2_stream_PRODUCT.sql ├── Chapter_12_Part3_stream_PARTNER.sql ├── Chapter_12_Part4_dynamic_table.sql ├── Orders_2023-09-05.json └── Orders_2023-09-06.json ├── Chapter_13 ├── Chapter_13_Part1_create_schema.sql ├── Chapter_13_Part2_create_tasks_orders.sql ├── Chapter_13_Part3_send_email.sql ├── Chapter_13_Part4_create_task_graph.sql ├── Chapter_13_Part5_monitoring.sql ├── Orders_2023-09-07.json └── Orders_2023-09-08.json ├── Chapter_14 ├── Chapter_14_Part1_data_quality_task_PARTNER.sql ├── Chapter_14_Part2_data_quality_task_PRODUCT.sql ├── Chapter_14_Part3_data_metric_functions.sql ├── Chapter_14_Part4_alert.sql └── Chapter_14_Part5_anomaly_detection.sql ├── Chapter_15 ├── Chapter_15_Part1_setup.sql ├── Chapter_15_Part2_RBAC.sql ├── Chapter_15_Part3_execute_RBAC.sql └── Snowflake_objects │ ├── deploy_objects.sql │ ├── resume_tasks.sql │ ├── schemas │ ├── dwh │ │ ├── dynamic_tables │ │ │ └── create_ORDERS_TBL.sql │ │ ├── tables │ │ │ ├── create_PARTNER_TBL.sql │ │ │ └── create_PRODUCT_TBL.sql │ │ └── views │ │ │ └── create_PRODUCT_VALID_TS.sql │ ├── ext │ │ ├── stages │ │ │ └── create_JSON_ORDERS_STAGE.sql │ │ ├── streams │ │ │ └── create_JSON_ORDERS_STREAM.sql │ │ └── tables │ │ │ └── create_JSON_ORDERS_EXT.sql │ ├── mgmt │ │ └── dynamic_tables │ │ │ └── create_ORDERS_SUMMARY_TBL.sql │ ├── orchestration │ │ └── tasks │ │ │ ├── create_COPY_ORDERS_TASK.sql │ │ │ ├── create_INSERT_ORDERS_STG_TASK.sql │ │ │ ├── create_INSERT_PARTNER_TASK.sql │ │ │ ├── create_INSERT_PRODUCT_TASK.sql │ │ │ ├── create_PIPELINE_END_TASK.sql │ │ │ └── create_PIPELINE_START_TASK.sql │ └── stg │ │ ├── streams │ │ ├── create_PARTNER_STREAM.sql │ │ └── create_PRODUCT_STREAM.sql │ │ └── tables │ │ ├── create_JSON_ORDERS_TBL_STG.sql │ │ ├── create_PARTNER.sql │ │ └── create_PRODUCT.sql │ └── suspend_tasks.sql └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | config.txt 2 | .venv 3 | Random -------------------------------------------------------------------------------- /Chapter_02/Chapter_02.sql: -------------------------------------------------------------------------------- 1 | -- initial setup: create database, schema and virtual warehouse 2 | use role SYSADMIN; 3 | create database BAKERY_DB; 4 | create schema ORDERS; 5 | create warehouse BAKERY_WH with warehouse_size = 'XSMALL'; 6 | 7 | -- create named internal stage 8 | use database BAKERY_DB; 9 | use schema ORDERS; 10 | create stage ORDERS_STAGE; 11 | -- view the contents of the stage (will be empty upon creation) 12 | list @ORDERS_STAGE; 13 | 14 | -- manually upload file Orders_2023-07-07.csv into the ORDERS_STAGE internal stage using the Snowsight user interface 15 | 16 | -- view the contents of the stage again (should show the file that was just uploaded) 17 | list @ORDERS_STAGE; 18 | 19 | -- then view data in the internal stage 20 | select $1, $2, $3, $4, $5 from @ORDERS_STAGE; 21 | 22 | -- create staging table 23 | use database BAKERY_DB; 24 | use schema ORDERS; 25 | create table ORDERS_STG ( 26 | customer varchar, 27 | order_date date, 28 | delivery_date date, 29 | baked_good_type varchar, 30 | quantity number, 31 | source_file_name varchar, 32 | load_ts timestamp 33 | ); 34 | 35 | -- copy data from the internal stage to the staging table using parameters: 36 | -- - file_format to specify that the header line is to be skipped 37 | -- - on_error to specify that the statement is to be aborted if an error is encountered 38 | -- - purge the csv file from the internal stage after loading data 39 | -- Listing 2.1 40 | use database BAKERY_DB; 41 | use schema ORDERS; 42 | copy into ORDERS_STG 43 | from ( 44 | select $1, $2, $3, $4, $5, metadata$filename, current_timestamp() 45 | from @ORDERS_STAGE 46 | ) 47 | file_format = (type = csv, skip_header = 1) 48 | on_error = abort_statement 49 | purge = true; 50 | 51 | -- view the data that was loaded 52 | select * from ORDERS_STG; 53 | 54 | -- view the contents of the stage again (should be empty again because the file was purged after loading) 55 | list @ORDERS_STAGE; 56 | 57 | -- create the target table 58 | use database BAKERY_DB; 59 | use schema ORDERS; 60 | create or replace table CUSTOMER_ORDERS ( 61 | customer varchar, 62 | order_date date, 63 | delivery_date date, 64 | baked_good_type varchar, 65 | quantity number, 66 | source_file_name varchar, 67 | load_ts timestamp 68 | ); 69 | 70 | 71 | -- merge data from the staging table into the target table 72 | -- Listing 2.2 73 | -- the target table 74 | merge into CUSTOMER_ORDERS tgt 75 | -- the source table 76 | using ORDERS_STG as src 77 | -- the columns that ensure uniqueness 78 | on src.customer = tgt.customer 79 | and src.delivery_date = tgt.delivery_date 80 | and src.baked_good_type = tgt.baked_good_type 81 | -- update the target table with the values from the source table 82 | when matched then 83 | update set tgt.quantity = src.quantity, 84 | tgt.source_file_name = src.source_file_name, 85 | tgt.load_ts = current_timestamp() 86 | -- insert new values from the source table into the target table 87 | when not matched then 88 | insert (customer, order_date, delivery_date, baked_good_type, 89 | quantity, source_file_name, load_ts) 90 | values(src.customer, src.order_date, src.delivery_date, 91 | src.baked_good_type, src.quantity, src.source_file_name, 92 | current_timestamp()); 93 | 94 | 95 | -- view data after merging 96 | select * from CUSTOMER_ORDERS order by delivery_date desc; 97 | 98 | -- create summary table 99 | use database BAKERY_DB; 100 | use schema ORDERS; 101 | create table SUMMARY_ORDERS( 102 | delivery_date date, 103 | baked_good_type varchar, 104 | total_quantity number 105 | ); 106 | 107 | -- construct a SQL query that summarizes the customer order data by delivery date, and baked good type 108 | select delivery_date, baked_good_type, sum(quantity) as total_quantity 109 | from CUSTOMER_ORDERS 110 | group by all; 111 | 112 | -- truncate summary table 113 | truncate table SUMMARY_ORDERS; 114 | 115 | -- insert summarized data into the summary table 116 | -- Listing 2.3 117 | insert into SUMMARY_ORDERS(delivery_date, baked_good_type, total_quantity) 118 | select delivery_date, baked_good_type, sum(quantity) as total_quantity 119 | from CUSTOMER_ORDERS 120 | group by all; 121 | 122 | -- view data in the summary table 123 | select * from SUMMARY_ORDERS; 124 | 125 | -- create task that executes the previous steps on schedule: 126 | -- - truncates the staging table 127 | -- - loads data from the internal stage into the staging table using the COPY command 128 | -- - merges data from the staging table into the target table 129 | -- - truncates the summary table 130 | -- - inserts summarized data into the summary table 131 | -- - executes every 10 minutes (for testing) - later will be rescheduled to run once every evening 132 | use database BAKERY_DB; 133 | use schema ORDERS; 134 | create or replace task PROCESS_ORDERS 135 | warehouse = BAKERY_WH 136 | schedule = '10 M' 137 | as 138 | begin 139 | truncate table ORDERS_STG; 140 | copy into ORDERS_STG 141 | from ( 142 | select $1, $2, $3, $4, $5, metadata$filename, current_timestamp() 143 | from @ORDERS_STAGE 144 | ) 145 | file_format = (type = csv, skip_header = 1) 146 | on_error = abort_statement 147 | purge = true; 148 | 149 | merge into CUSTOMER_ORDERS tgt 150 | using ORDERS_STG as src 151 | on src.customer = tgt.customer and src.delivery_date = tgt.delivery_date and src.baked_good_type = tgt.baked_good_type 152 | when matched then 153 | update set tgt.quantity = src.quantity, tgt.source_file_name = src.source_file_name, tgt.load_ts = current_timestamp() 154 | when not matched then 155 | insert (customer, order_date, delivery_date, baked_good_type, quantity, source_file_name, load_ts) 156 | values(src.customer, src.order_date, src.delivery_date, src.baked_good_type, src.quantity, src.source_file_name, current_timestamp()); 157 | 158 | truncate table SUMMARY_ORDERS; 159 | insert into SUMMARY_ORDERS(delivery_date, baked_good_type, total_quantity) 160 | select delivery_date, baked_good_type, sum(quantity) as total_quantity 161 | from CUSTOMER_ORDERS 162 | group by all; 163 | end; 164 | 165 | -- grant EXECUTE TASK privilege to the user who will be executing the task 166 | use role accountadmin; 167 | grant execute task on account to role sysadmin; 168 | use role sysadmin; 169 | 170 | -- manually execute task to test 171 | execute task PROCESS_ORDERS; 172 | 173 | -- view all previous and scheduled task executions 174 | -- Listing 2.4 175 | select * 176 | from table(information_schema.task_history()) 177 | order by scheduled_time desc; 178 | 179 | -- when the task is created it is initially suspended, must be manually resumed 180 | alter task PROCESS_ORDERS resume; 181 | 182 | -- change the task schedule to run at 11PM using UTC timezone 183 | -- must suspend task first and resume after changing the schedule 184 | alter task PROCESS_ORDERS suspend; 185 | 186 | alter task PROCESS_ORDERS 187 | set schedule = 'USING CRON 0 23 * * * UTC'; 188 | 189 | alter task PROCESS_ORDERS resume; 190 | 191 | -- when done, suspend the task so that it doesn't continue to execute and consume credits 192 | alter task PROCESS_ORDERS suspend; 193 | -------------------------------------------------------------------------------- /Chapter_02/Orders_2023-07-07.csv: -------------------------------------------------------------------------------- 1 | Customer,Order date,Delivery date,Baked good type,Quantity 2 | Coffee Pocket,2023-07-07,2023-07-10,Baguette,6 3 | Coffee Pocket,2023-07-07,2023-07-10,Bagel,12 4 | Coffee Pocket,2023-07-07,2023-07-10,English Muffin,16 5 | Coffee Pocket,2023-07-07,2023-07-10,Croissant,18 6 | Lily's Coffee,2023-07-07,2023-07-10,Bagel,20 7 | Lily's Coffee,2023-07-07,2023-07-10,White Loaf,4 8 | Lily's Coffee,2023-07-07,2023-07-10,Croissant,20 9 | Crave Coffee,2023-07-07,2023-07-10,Croissant,50 10 | Best Burgers,2023-07-07,2023-07-10,Hamburger Bun,75 11 | Page One Fast Food,2023-07-07,2023-07-10,Bagel,20 12 | Page One Fast Food,2023-07-07,2023-07-10,English Muffin,36 13 | Page One Fast Food,2023-07-07,2023-07-10,White Loaf,5 14 | Page One Fast Food,2023-07-07,2023-07-10,Hamburger Bun,50 15 | Jimmy's Diner,2023-07-07,2023-07-10,Bagel,18 16 | Jimmy's Diner,2023-07-07,2023-07-10,Rye Loaf,2 17 | Jimmy's Diner,2023-07-07,2023-07-10,White Loaf,7 18 | Jimmy's Diner,2023-07-07,2023-07-10,Whole Wheat Loaf,3 19 | Jimmy's Diner,2023-07-07,2023-07-10,Hamburger Bun,12 20 | Metro Fine Foods,2023-07-07,2023-07-10,Baguette,24 21 | Metro Fine Foods,2023-07-07,2023-07-10,Rye Loaf,8 22 | Metro Fine Foods,2023-07-07,2023-07-10,White Loaf,28 23 | Metro Fine Foods,2023-07-07,2023-07-10,Whole Wheat Loaf,22 24 | Coffee Pocket,2023-07-07,2023-07-11,Baguette,5 25 | Coffee Pocket,2023-07-07,2023-07-11,Bagel,9 26 | Coffee Pocket,2023-07-07,2023-07-11,English Muffin,12 27 | Coffee Pocket,2023-07-07,2023-07-11,Croissant,14 28 | Lily's Coffee,2023-07-07,2023-07-11,Bagel,15 29 | Lily's Coffee,2023-07-07,2023-07-11,White Loaf,3 30 | Lily's Coffee,2023-07-07,2023-07-11,Croissant,15 31 | Crave Coffee,2023-07-07,2023-07-11,Croissant,38 32 | Best Burgers,2023-07-07,2023-07-11,Hamburger Bun,56 33 | Page One Fast Food,2023-07-07,2023-07-11,Bagel,15 34 | Page One Fast Food,2023-07-07,2023-07-11,English Muffin,27 35 | Page One Fast Food,2023-07-07,2023-07-11,White Loaf,4 36 | Page One Fast Food,2023-07-07,2023-07-11,Hamburger Bun,38 37 | Jimmy's Diner,2023-07-07,2023-07-11,Bagel,14 38 | Jimmy's Diner,2023-07-07,2023-07-11,Rye Loaf,2 39 | Jimmy's Diner,2023-07-07,2023-07-11,White Loaf,5 40 | Jimmy's Diner,2023-07-07,2023-07-11,Whole Wheat Loaf,2 41 | Jimmy's Diner,2023-07-07,2023-07-11,Hamburger Bun,9 42 | Metro Fine Foods,2023-07-07,2023-07-11,Baguette,18 43 | Metro Fine Foods,2023-07-07,2023-07-11,Rye Loaf,6 44 | Metro Fine Foods,2023-07-07,2023-07-11,White Loaf,21 45 | Metro Fine Foods,2023-07-07,2023-07-11,Whole Wheat Loaf,17 46 | Coffee Pocket,2023-07-07,2023-07-12,Baguette,4 47 | Coffee Pocket,2023-07-07,2023-07-12,Bagel,5 48 | Coffee Pocket,2023-07-07,2023-07-12,English Muffin,6 49 | Coffee Pocket,2023-07-07,2023-07-12,Croissant,13 50 | Lily's Coffee,2023-07-07,2023-07-12,Bagel,4 51 | Lily's Coffee,2023-07-07,2023-07-12,White Loaf,3 52 | Crave Coffee,2023-07-07,2023-07-12,Croissant,11 53 | Best Burgers,2023-07-07,2023-07-12,Hamburger Bun,56 54 | Page One Fast Food,2023-07-07,2023-07-12,Bagel,5 55 | Page One Fast Food,2023-07-07,2023-07-12,English Muffin,27 56 | Page One Fast Food,2023-07-07,2023-07-12,White Loaf,2 57 | Page One Fast Food,2023-07-07,2023-07-12,Hamburger Bun,36 58 | Jimmy's Diner,2023-07-07,2023-07-12,Bagel,4 59 | Jimmy's Diner,2023-07-07,2023-07-12,Rye Loaf,2 60 | Jimmy's Diner,2023-07-07,2023-07-12,White Loaf,2 61 | Jimmy's Diner,2023-07-07,2023-07-12,Whole Wheat Loaf,1 62 | Metro Fine Foods,2023-07-07,2023-07-12,Baguette,1 63 | Metro Fine Foods,2023-07-07,2023-07-12,Rye Loaf,6 64 | Metro Fine Foods,2023-07-07,2023-07-12,White Loaf,5 65 | Metro Fine Foods,2023-07-07,2023-07-12,Whole Wheat Loaf,7 66 | -------------------------------------------------------------------------------- /Chapter_02/Orders_2023-07-08.csv: -------------------------------------------------------------------------------- 1 | Customer,Order date,Delivery date,Baked good type,Quantity 2 | Coffee Pocket,2023-07-08,2023-07-11,Baguette,5 3 | Coffee Pocket,2023-07-08,2023-07-11,Bagel,9 4 | Coffee Pocket,2023-07-08,2023-07-11,English Muffin,12 5 | Coffee Pocket,2023-07-08,2023-07-11,Croissant,14 6 | Lily's Coffee,2023-07-08,2023-07-11,Bagel,15 7 | Lily's Coffee,2023-07-08,2023-07-11,White Loaf,3 8 | Lily's Coffee,2023-07-08,2023-07-11,Croissant,15 9 | Crave Coffee,2023-07-08,2023-07-11,Croissant,38 10 | Best Burgers,2023-07-08,2023-07-11,Hamburger Bun,56 11 | Page One Fast Food,2023-07-08,2023-07-11,Bagel,15 12 | Page One Fast Food,2023-07-08,2023-07-11,English Muffin,27 13 | Page One Fast Food,2023-07-08,2023-07-11,White Loaf,4 14 | Page One Fast Food,2023-07-08,2023-07-11,Hamburger Bun,38 15 | Jimmy's Diner,2023-07-08,2023-07-11,Bagel,14 16 | Jimmy's Diner,2023-07-08,2023-07-11,Rye Loaf,2 17 | Jimmy's Diner,2023-07-08,2023-07-11,White Loaf,5 18 | Jimmy's Diner,2023-07-08,2023-07-11,Whole Wheat Loaf,2 19 | Jimmy's Diner,2023-07-08,2023-07-11,Hamburger Bun,9 20 | Metro Fine Foods,2023-07-08,2023-07-11,Baguette,18 21 | Metro Fine Foods,2023-07-08,2023-07-11,Rye Loaf,6 22 | Metro Fine Foods,2023-07-08,2023-07-11,White Loaf,21 23 | Metro Fine Foods,2023-07-08,2023-07-11,Whole Wheat Loaf,17 24 | Coffee Pocket,2023-07-08,2023-07-12,Baguette,4 25 | Coffee Pocket,2023-07-08,2023-07-12,Bagel,5 26 | Coffee Pocket,2023-07-08,2023-07-12,English Muffin,6 27 | Coffee Pocket,2023-07-08,2023-07-12,Croissant,13 28 | Lily's Coffee,2023-07-08,2023-07-12,Bagel,4 29 | Lily's Coffee,2023-07-08,2023-07-12,White Loaf,3 30 | Crave Coffee,2023-07-08,2023-07-12,Croissant,11 31 | Best Burgers,2023-07-08,2023-07-12,Hamburger Bun,56 32 | Page One Fast Food,2023-07-08,2023-07-12,Bagel,5 33 | Page One Fast Food,2023-07-08,2023-07-12,English Muffin,27 34 | Page One Fast Food,2023-07-08,2023-07-12,White Loaf,2 35 | Page One Fast Food,2023-07-08,2023-07-12,Hamburger Bun,36 36 | Jimmy's Diner,2023-07-08,2023-07-12,Bagel,4 37 | Jimmy's Diner,2023-07-08,2023-07-12,Rye Loaf,2 38 | Jimmy's Diner,2023-07-08,2023-07-12,White Loaf,2 39 | Jimmy's Diner,2023-07-08,2023-07-12,Whole Wheat Loaf,1 40 | Metro Fine Foods,2023-07-08,2023-07-12,Baguette,1 41 | Metro Fine Foods,2023-07-08,2023-07-12,Rye Loaf,6 42 | Metro Fine Foods,2023-07-08,2023-07-12,White Loaf,5 43 | Metro Fine Foods,2023-07-08,2023-07-12,Whole Wheat Loaf,7 44 | Coffee Pocket,2023-07-08,2023-07-13,Baguette,5 45 | Coffee Pocket,2023-07-08,2023-07-13,Bagel,5 46 | Coffee Pocket,2023-07-08,2023-07-13,English Muffin,12 47 | Coffee Pocket,2023-07-08,2023-07-13,Croissant,13 48 | Lily's Coffee,2023-07-08,2023-07-13,Bagel,15 49 | Lily's Coffee,2023-07-08,2023-07-13,White Loaf,2 50 | Lily's Coffee,2023-07-08,2023-07-13,Croissant,5 51 | Crave Coffee,2023-07-08,2023-07-13,Croissant,40 52 | Best Burgers,2023-07-08,2023-07-13,Hamburger Bun,62 53 | Page One Fast Food,2023-07-08,2023-07-13,Bagel,16 54 | Page One Fast Food,2023-07-08,2023-07-13,English Muffin,27 55 | Page One Fast Food,2023-07-08,2023-07-13,White Loaf,1 56 | Page One Fast Food,2023-07-08,2023-07-13,Hamburger Bun,49 57 | Jimmy's Diner,2023-07-08,2023-07-13,Bagel,16 58 | Jimmy's Diner,2023-07-08,2023-07-13,White Loaf,3 59 | Jimmy's Diner,2023-07-08,2023-07-13,Whole Wheat Loaf,2 60 | Jimmy's Diner,2023-07-08,2023-07-13,Hamburger Bun,10 61 | Metro Fine Foods,2023-07-08,2023-07-13,Baguette,20 62 | Metro Fine Foods,2023-07-08,2023-07-13,Rye Loaf,4 63 | Metro Fine Foods,2023-07-08,2023-07-13,White Loaf,20 64 | Metro Fine Foods,2023-07-08,2023-07-13,Whole Wheat Loaf,19 65 | -------------------------------------------------------------------------------- /Chapter_02/Orders_2023-07-09.csv: -------------------------------------------------------------------------------- 1 | Customer,Order date,Delivery date,Baked good type,Quantity 2 | Coffee Pocket,2023-07-09,2023-07-13,Baguette,5 3 | Coffee Pocket,2023-07-09,2023-07-13,Bagel,10 4 | Coffee Pocket,2023-07-09,2023-07-13,English Muffin,12 5 | Coffee Pocket,2023-07-09,2023-07-13,Croissant,13 6 | Lily's Coffee,2023-07-09,2023-07-13,Bagel,15 7 | Lily's Coffee,2023-07-09,2023-07-13,White Loaf,4 8 | Lily's Coffee,2023-07-09,2023-07-13,Croissant,15 9 | Crave Coffee,2023-07-09,2023-07-13,Croissant,40 10 | Best Burgers,2023-07-09,2023-07-13,Hamburger Bun,62 11 | Page One Fast Food,2023-07-09,2023-07-13,Bagel,16 12 | Page One Fast Food,2023-07-09,2023-07-13,English Muffin,27 13 | Page One Fast Food,2023-07-09,2023-07-13,White Loaf,2 14 | Page One Fast Food,2023-07-09,2023-07-13,Hamburger Bun,49 15 | Jimmy's Diner,2023-07-09,2023-07-13,Bagel,16 16 | Jimmy's Diner,2023-07-09,2023-07-13,White Loaf,3 17 | Jimmy's Diner,2023-07-09,2023-07-13,Whole Wheat Loaf,2 18 | Jimmy's Diner,2023-07-09,2023-07-13,Hamburger Bun,10 19 | Metro Fine Foods,2023-07-09,2023-07-13,Baguette,20 20 | Metro Fine Foods,2023-07-09,2023-07-13,Rye Loaf,4 21 | Metro Fine Foods,2023-07-09,2023-07-13,White Loaf,20 22 | Metro Fine Foods,2023-07-09,2023-07-13,Whole Wheat Loaf,19 23 | Coffee Pocket,2023-07-09,2023-07-14,Baguette,6 24 | Coffee Pocket,2023-07-09,2023-07-14,Bagel,12 25 | Coffee Pocket,2023-07-09,2023-07-14,English Muffin,10 26 | Coffee Pocket,2023-07-09,2023-07-14,Croissant,12 27 | Lily's Coffee,2023-07-09,2023-07-14,Bagel,10 28 | Lily's Coffee,2023-07-09,2023-07-14,White Loaf,2 29 | Lily's Coffee,2023-07-09,2023-07-14,Croissant,16 30 | Crave Coffee,2023-07-09,2023-07-14,Croissant,32 31 | Best Burgers,2023-07-09,2023-07-14,Hamburger Bun,40 32 | Page One Fast Food,2023-07-09,2023-07-14,Bagel,15 33 | Page One Fast Food,2023-07-09,2023-07-14,English Muffin,20 34 | Page One Fast Food,2023-07-09,2023-07-14,White Loaf,2 35 | Page One Fast Food,2023-07-09,2023-07-14,Hamburger Bun,50 36 | Jimmy's Diner,2023-07-09,2023-07-14,Bagel,16 37 | Jimmy's Diner,2023-07-09,2023-07-14,White Loaf,3 38 | Jimmy's Diner,2023-07-09,2023-07-14,Whole Wheat Loaf,4 39 | Jimmy's Diner,2023-07-09,2023-07-14,Hamburger Bun,10 40 | Metro Fine Foods,2023-07-09,2023-07-14,Baguette,20 41 | Metro Fine Foods,2023-07-09,2023-07-14,Rye Loaf,4 42 | Metro Fine Foods,2023-07-09,2023-07-14,White Loaf,12 43 | Metro Fine Foods,2023-07-09,2023-07-14,Whole Wheat Loaf,16 44 | -------------------------------------------------------------------------------- /Chapter_03/202308/Orders_2023-08-07.csv: -------------------------------------------------------------------------------- 1 | Customer,Order date,Delivery date,Baked good type,Quantity 2 | New Bistro,2023-08-07,2023-08-08,Baguette,12 3 | New Bistro,2023-08-07,2023-08-08,Whole Wheat Loaf,12 4 | New Bistro,2023-08-07,2023-08-08,White Loaf,10 5 | New Bistro,2023-08-07,2023-08-08,Croissant,20 6 | New Bistro,2023-08-07,2023-08-09,Baguette,9 7 | New Bistro,2023-08-07,2023-08-09,Whole Wheat Loaf,8 8 | New Bistro,2023-08-07,2023-08-09,White Loaf,10 9 | New Bistro,2023-08-07,2023-08-09,Croissant,24 -------------------------------------------------------------------------------- /Chapter_03/202308/Orders_2023-08-08.csv: -------------------------------------------------------------------------------- 1 | Customer,Order date,Delivery date,Baked good type,Quantity 2 | New Bistro,2023-08-08,2023-08-09,Baguette,9 3 | New Bistro,2023-08-08,2023-08-09,Whole Wheat Loaf,8 4 | New Bistro,2023-08-08,2023-08-09,White Loaf,10 5 | New Bistro,2023-08-08,2023-08-09,Croissant,24 6 | New Bistro,2023-08-08,2023-08-10,Baguette,15 7 | New Bistro,2023-08-08,2023-08-10,Whole Wheat Loaf,10 8 | New Bistro,2023-08-08,2023-08-10,White Loaf,11 9 | New Bistro,2023-08-08,2023-08-10,Croissant,36 -------------------------------------------------------------------------------- /Chapter_03/Chapter_03.sql: -------------------------------------------------------------------------------- 1 | -- create a storage integration 2 | -- using Microsoft Azure 3 | -- refer to Chapter 4 for Amazon S3 4 | use role ACCOUNTADMIN; 5 | 6 | create storage integration BISTRO_INTEGRATION 7 | type = external_stage 8 | storage_provider = 'AZURE' 9 | enabled = true 10 | azure_tenant_id = '1234abcd-xxx-56efgh78' --use your own Tenant ID 11 | storage_allowed_locations = ('azure://bakeryorders001.blob.core.windows.net/orderfiles/'); 12 | 13 | -- describe the storage integration and take note of the following parameters: 14 | -- - AZURE_CONSENT_URL 15 | -- - AZURE_MULTI_TENANT_APP_NAME 16 | describe storage integration BISTRO_INTEGRATION; 17 | 18 | -- grant usage on storage integration so that the SYSADMIN role can use it 19 | grant usage on integration BISTRO_INTEGRATION to role SYSADMIN; 20 | 21 | -- create a new schema in the BAKERY_DB database (see Chapter 2) 22 | use role SYSADMIN; 23 | create warehouse if not exists BAKERY_WH with warehouse_size = 'XSMALL'; 24 | create database if not exists BAKERY_DB; 25 | use database BAKERY_DB; 26 | create schema EXTERNAL_ORDERS; 27 | use schema EXTERNAL_ORDERS; 28 | 29 | -- create an external stage using the storage integration 30 | -- Listing 3.1 31 | create stage BISTRO_STAGE 32 | storage_integration = BISTRO_INTEGRATION 33 | url = 'azure://bakeryorders001.blob.core.windows.net/orderfiles'; 34 | 35 | -- Upload a sample file named Orders_2023-08-04.csv to the storage container 36 | 37 | -- view files in the external stage 38 | list @BISTRO_STAGE; 39 | 40 | -- create an external stage using a SAS token 41 | create stage BISTRO_SAS_STAGE 42 | URL = 'azure://bakeryorders001.blob.core.windows.net/orderfiles' 43 | CREDENTIALS=(AZURE_SAS_TOKEN = '?sv=2023-...%3D'); --generate and use your own SAS token 44 | 45 | -- view files in the external stage 46 | list @BISTRO_SAS_STAGE; 47 | 48 | -- create a named file format 49 | create file format ORDERS_CSV_FORMAT 50 | type = csv 51 | field_delimiter = ',' 52 | skip_header = 1; 53 | 54 | -- create the external stage by adding the file format 55 | create or replace stage BISTRO_STAGE 56 | storage_integration = BISTRO_INTEGRATION 57 | url = 'azure://bakeryorders001.blob.core.windows.net/orderfiles' 58 | file_format = ORDERS_CSV_FORMAT; 59 | 60 | -- create staging table for restaurant orders 61 | use database BAKERY_DB; 62 | use schema EXTERNAL_ORDERS; 63 | create table ORDERS_BISTRO_STG ( 64 | customer varchar, 65 | order_date date, 66 | delivery_date date, 67 | baked_good_type varchar, 68 | quantity number, 69 | source_file_name varchar, 70 | load_ts timestamp 71 | ); 72 | 73 | -- load data from the stage into the staging table 74 | -- Listing 3.3 75 | copy into ORDERS_BISTRO_STG 76 | from ( 77 | select $1, $2, $3, $4, $5, metadata$filename, current_timestamp() 78 | from @BISTRO_STAGE 79 | ) 80 | file_format = ORDERS_CSV_FORMAT 81 | on_error = abort_statement 82 | ; 83 | 84 | -- view data in the staging table 85 | select * from ORDERS_BISTRO_STG; 86 | 87 | -- view load history for the table 88 | select * 89 | from information_schema.load_history 90 | where schema_name = 'EXTERNAL_ORDERS' and table_name = 'ORDERS_BISTRO_STG' 91 | order by last_load_time desc; 92 | 93 | -- add a directory table to the stage 94 | alter stage BISTRO_STAGE 95 | set directory = (enable = true); 96 | 97 | -- manually refresh the directory 98 | alter stage BISTRO_STAGE refresh; 99 | 100 | -- query the directory table 101 | select * 102 | from directory (@BISTRO_STAGE); 103 | 104 | -- Upload additional CSV files to the container in the 202308 path 105 | 106 | -- load data from the stage into the staging table by specifying a path 107 | copy into ORDERS_BISTRO_STG 108 | from ( 109 | select $1, $2, $3, $4, $5, metadata$filename, current_timestamp() 110 | from @BISTRO_STAGE/202308 111 | ) 112 | file_format = ORDERS_CSV_FORMAT 113 | on_error = abort_statement 114 | ; 115 | 116 | -- create an external table 117 | -- Listing 3.4 118 | use database BAKERY_DB; 119 | use schema EXTERNAL_ORDERS; 120 | create external table ORDERS_BISTRO_EXT ( 121 | customer varchar as (VALUE:c1::varchar), 122 | order_date date as (VALUE:c2::date), 123 | delivery_date date as (VALUE:c3::date), 124 | baked_good_type varchar as (VALUE:c4::varchar), 125 | quantity number as (VALUE:c5::number), 126 | source_file_name varchar as metadata$filename 127 | ) 128 | location = @BISTRO_STAGE 129 | auto_refresh = FALSE 130 | file_format = ORDERS_CSV_FORMAT; 131 | 132 | -- query the external table 133 | select * 134 | from ORDERS_BISTRO_EXT; 135 | 136 | -- refresh the external table 137 | alter external table ORDERS_BISTRO_EXT refresh; 138 | 139 | -- create a materialized view 140 | use database BAKERY_DB; 141 | use schema EXTERNAL_ORDERS; 142 | create materialized view ORDERS_BISTRO_MV as 143 | select customer, order_date, delivery_date, 144 | baked_good_type, quantity, source_file_name 145 | from ORDERS_BISTRO_EXT; 146 | 147 | -- query the materialized view 148 | select * 149 | from ORDERS_BISTRO_MV; 150 | -------------------------------------------------------------------------------- /Chapter_03/Orders_2023-08-04.csv: -------------------------------------------------------------------------------- 1 | Customer,Order date,Delivery date,Baked good type,Quantity 2 | New Bistro,2023-08-04,2023-08-07,Baguette,15 3 | New Bistro,2023-08-04,2023-08-07,Whole Wheat Loaf,12 4 | New Bistro,2023-08-04,2023-08-07,White Loaf,10 5 | New Bistro,2023-08-04,2023-08-07,Croissant,30 6 | New Bistro,2023-08-04,2023-08-08,Baguette,12 7 | New Bistro,2023-08-04,2023-08-08,Whole Wheat Loaf,12 8 | New Bistro,2023-08-04,2023-08-08,White Loaf,10 9 | New Bistro,2023-08-04,2023-08-08,Croissant,20 -------------------------------------------------------------------------------- /Chapter_04/Chapter_04_Part1.sql: -------------------------------------------------------------------------------- 1 | -- create a storage integration 2 | -- using Amazon S3 3 | -- refer to Chapter 3 for Microsoft Azure 4 | use role ACCOUNTADMIN; 5 | 6 | create storage integration PARK_INN_INTEGRATION 7 | type = external_stage 8 | storage_provider = 'S3' 9 | enabled = true 10 | storage_aws_role_arn = 'arn:aws:iam::567890987654:role/Snowflake-demo' 11 | storage_allowed_locations = ('s3://parkinnorders001/'); 12 | 13 | -- describe the storage integration and take note of the following parameters: 14 | -- - STORAGE_AWS_IAM_USER_ARN 15 | -- - STORAGE_AWS_EXTERNAL_ID 16 | describe storage integration PARK_INN_INTEGRATION; 17 | 18 | -- grant usage on storage integration so that the SYSADMIN role can use it 19 | grant usage on integration PARK_INN_INTEGRATION to role SYSADMIN; 20 | 21 | -- create a new schema in the BAKERY_DB database (see Chapter 2) 22 | use role SYSADMIN; 23 | create warehouse if not exists BAKERY_WH with warehouse_size = 'XSMALL'; 24 | use warehouse BAKERY_WH; 25 | create database if not exists BAKERY_DB; 26 | use database BAKERY_DB; 27 | create schema EXTERNAL_JSON_ORDERS; 28 | use schema EXTERNAL_JSON_ORDERS; 29 | 30 | -- create an external stage using the storage integration 31 | create stage PARK_INN_STAGE 32 | storage_integration = PARK_INN_INTEGRATION 33 | url = 's3://parkinnorders001/' 34 | file_format = (type = json); 35 | 36 | -- view files in the external stage 37 | list @PARK_INN_STAGE; 38 | 39 | -- view data in the staged file 40 | select $1 from @PARK_INN_STAGE; 41 | 42 | -- create staging table for restaurant orders in raw (json) format 43 | use database BAKERY_DB; 44 | use schema EXTERNAL_JSON_ORDERS; 45 | create table ORDERS_PARK_INN_RAW_STG ( 46 | customer_orders variant, 47 | source_file_name varchar, 48 | load_ts timestamp 49 | ); 50 | 51 | -- load data from the stage into the staging table 52 | copy into ORDERS_PARK_INN_RAW_STG 53 | from ( 54 | select 55 | $1, 56 | metadata$filename, 57 | current_timestamp() 58 | from @PARK_INN_STAGE 59 | ) 60 | on_error = abort_statement 61 | ; 62 | 63 | -- view data in the staging table 64 | select * 65 | from ORDERS_PARK_INN_RAW_STG; 66 | 67 | -- select the values from the first level keys 68 | -- Listing 4.2 69 | select 70 | customer_orders:"Customer"::varchar as customer, 71 | customer_orders:"Order date"::date as order_date, 72 | customer_orders:"Orders" 73 | from ORDERS_PARK_INN_RAW_STG; 74 | 75 | -- select the values from the second level keys using LATERAL FLATTEN 76 | -- Listing 4.3 77 | select 78 | customer_orders:"Customer"::varchar as customer, 79 | customer_orders:"Order date"::date as order_date, 80 | value:"Delivery date"::date as delivery_date, 81 | value:"Orders by day" 82 | from ORDERS_PARK_INN_RAW_STG, 83 | lateral flatten (input => customer_orders:"Orders"); 84 | 85 | -- select the values from the third level keys using another LATERAL FLATTEN 86 | -- Listing 4.4 87 | select 88 | customer_orders:"Customer"::varchar as customer, 89 | customer_orders:"Order date"::date as order_date, 90 | CO.value:"Delivery date"::date as delivery_date, 91 | DO.value:"Baked good type":: varchar as baked_good_type, 92 | DO.value:"Quantity"::number as quantity 93 | from ORDERS_PARK_INN_RAW_STG, 94 | lateral flatten (input => customer_orders:"Orders") CO, 95 | lateral flatten (input => CO.value:"Orders by day") DO; 96 | 97 | -- create a view to represent a relational staging table using the previous query 98 | 99 | use database BAKERY_DB; 100 | use schema EXTERNAL_JSON_ORDERS; 101 | create view ORDERS_PARK_INN_STG as 102 | select 103 | customer_orders:"Customer"::varchar as customer, 104 | customer_orders:"Order date"::date as order_date, 105 | CO.value:"Delivery date"::date as delivery_date, 106 | DO.value:"Baked good type":: varchar as baked_good_type, 107 | DO.value:"Quantity"::number as quantity, 108 | source_file_name, 109 | load_ts 110 | from ORDERS_PARK_INN_RAW_STG, 111 | lateral flatten (input => customer_orders:"Orders") CO, 112 | lateral flatten (input => CO.value:"Orders by day") DO; 113 | 114 | -- view data in the view 115 | select * 116 | from ORDERS_PARK_INN_STG; -------------------------------------------------------------------------------- /Chapter_04/Chapter_04_Part2.sql: -------------------------------------------------------------------------------- 1 | use role SYSADMIN; 2 | create warehouse if not exists BAKERY_WH with warehouse_size = 'XSMALL'; 3 | use warehouse BAKERY_WH; 4 | create database if not exists BAKERY_DB; 5 | use database BAKERY_DB; 6 | create schema TRANSFORM; 7 | use schema TRANSFORM; 8 | 9 | -- create a view that combines data from individual staging tables 10 | create view ORDERS_COMBINED_STG as 11 | select customer, order_date, delivery_date, baked_good_type, quantity, source_file_name, load_ts 12 | from bakery_db.orders.ORDERS_STG 13 | union all 14 | select customer, order_date, delivery_date, baked_good_type, quantity, source_file_name, load_ts 15 | from bakery_db.external_orders.ORDERS_BISTRO_STG 16 | union all 17 | select customer, order_date, delivery_date, baked_good_type, quantity, source_file_name, load_ts 18 | from bakery_db.external_json_orders.ORDERS_PARK_INN_STG; 19 | 20 | -- create target table that will store historical orders combined from all sources 21 | use database BAKERY_DB; 22 | use schema TRANSFORM; 23 | use schema TRANSFORM; 24 | create or replace table CUSTOMER_ORDERS_COMBINED ( 25 | customer varchar, 26 | order_date date, 27 | delivery_date date, 28 | baked_good_type varchar, 29 | quantity number, 30 | source_file_name varchar, 31 | load_ts timestamp 32 | ); 33 | 34 | -- merge combined staging data into the target table 35 | -- Listing 4.5 36 | merge into CUSTOMER_ORDERS_COMBINED tgt 37 | using ORDERS_COMBINED_STG as src 38 | on src.customer = tgt.customer and src.delivery_date = tgt.delivery_date and src.baked_good_type = tgt.baked_good_type 39 | when matched then 40 | update set tgt.quantity = src.quantity, tgt.source_file_name = src.source_file_name, tgt.load_ts = current_timestamp() 41 | when not matched then 42 | insert (customer, order_date, delivery_date, baked_good_type, quantity, source_file_name, load_ts) 43 | values(src.customer, src.order_date, src.delivery_date, src.baked_good_type, src.quantity, src.source_file_name, current_timestamp()) 44 | ; 45 | 46 | -- create a stored procedure that executes the previous MERGE statement 47 | -- Listing 4.6 48 | use database BAKERY_DB; 49 | use schema TRANSFORM; 50 | create or replace procedure LOAD_CUSTOMER_ORDERS() 51 | returns varchar 52 | language sql 53 | as 54 | $$ 55 | begin 56 | merge into CUSTOMER_ORDERS_COMBINED tgt 57 | using ORDERS_COMBINED_STG as src 58 | on src.customer = tgt.customer and src.delivery_date = tgt.delivery_date and src.baked_good_type = tgt.baked_good_type 59 | when matched then 60 | update set tgt.quantity = src.quantity, 61 | tgt.source_file_name = src.source_file_name, 62 | tgt.load_ts = current_timestamp() 63 | when not matched then 64 | insert (customer, order_date, delivery_date, 65 | baked_good_type, quantity, source_file_name, load_ts) 66 | values(src.customer, src.order_date, src.delivery_date, 67 | src.baked_good_type, src.quantity, src.source_file_name, 68 | current_timestamp()); 69 | end; 70 | $$ 71 | ; 72 | 73 | -- execute the stored procedure 74 | call LOAD_CUSTOMER_ORDERS(); 75 | 76 | -- modify the stored procedure: add return string 77 | use database BAKERY_DB; 78 | use schema TRANSFORM; 79 | -- Listing 4.8 80 | create procedure LOAD_CUSTOMER_ORDERS() 81 | returns varchar 82 | language sql 83 | as 84 | $$ 85 | begin 86 | merge into CUSTOMER_ORDERS_COMBINED tgt 87 | using ORDERS_COMBINED_STG as src 88 | on src.customer = tgt.customer and src.delivery_date = tgt.delivery_date and src.baked_good_type = tgt.baked_good_type 89 | when matched then 90 | update set tgt.quantity = src.quantity, 91 | tgt.source_file_name = src.source_file_name, 92 | tgt.load_ts = current_timestamp() 93 | when not matched then 94 | insert (customer, order_date, delivery_date, 95 | baked_good_type, quantity, source_file_name, load_ts) 96 | values(src.customer, src.order_date, src.delivery_date, 97 | src.baked_good_type, src.quantity, src.source_file_name, 98 | current_timestamp()); 99 | return 'Load completed. ' || SQLROWCOUNT || ' rows affected.'; 100 | end; 101 | $$ 102 | ; 103 | 104 | -- execute the stored procedure 105 | call LOAD_CUSTOMER_ORDERS(); 106 | 107 | -- modify the stored procedure: add exception handling 108 | -- Listing 4.9 109 | use database BAKERY_DB; 110 | use schema TRANSFORM; 111 | create or replace procedure LOAD_CUSTOMER_ORDERS() 112 | returns varchar 113 | language sql 114 | as 115 | $$ 116 | begin 117 | merge into CUSTOMER_ORDERS_COMBINED tgt 118 | using ORDERS_COMBINED_STG as src 119 | on src.customer = tgt.customer and src.delivery_date = tgt.delivery_date and src.baked_good_type = tgt.baked_good_type 120 | when matched then 121 | update set tgt.quantity = src.quantity, 122 | tgt.source_file_name = src.source_file_name, 123 | tgt.load_ts = current_timestamp() 124 | when not matched then 125 | insert (customer, order_date, delivery_date, 126 | baked_good_type, quantity, source_file_name, load_ts) 127 | values(src.customer, src.order_date, src.delivery_date, 128 | src.baked_good_type, src.quantity, src.source_file_name, 129 | current_timestamp()); 130 | return 'Load completed. ' || SQLROWCOUNT || ' rows affected.'; 131 | exception 132 | when other then 133 | return 'Load failed with error message: ' || SQLERRM; 134 | end; 135 | $$ 136 | ; 137 | 138 | -- execute the stored procedure 139 | call LOAD_CUSTOMER_ORDERS(); 140 | -------------------------------------------------------------------------------- /Chapter_04/Chapter_04_Part3.sql: -------------------------------------------------------------------------------- 1 | use warehouse BAKERY_WH; 2 | use database BAKERY_DB; 3 | use schema TRANSFORM; 4 | 5 | use role SYSADMIN; 6 | -- create an event table 7 | create event table BAKERY_EVENTS; 8 | 9 | use role ACCOUNTADMIN; 10 | -- associate the event table with the account 11 | alter account set event_table = BAKERY_DB.TRANSFORM.BAKERY_EVENTS; 12 | -- grant privileges to set log level to the SYSADMIN role 13 | grant modify log level on account to role SYSADMIN; 14 | 15 | use role SYSADMIN; 16 | -- set the log level on the stored procedure to DEBUG 17 | alter procedure LOAD_CUSTOMER_ORDERS() set log_level = DEBUG; 18 | 19 | -- modify the stored procedure: add logging 20 | use database BAKERY_DB; 21 | use schema TRANSFORM; 22 | create or replace procedure LOAD_CUSTOMER_ORDERS() 23 | returns varchar 24 | language sql 25 | as 26 | $$ 27 | begin 28 | SYSTEM$LOG_DEBUG('LOAD_CUSTOMER_ORDERS begin '); 29 | merge into CUSTOMER_ORDERS_COMBINED tgt 30 | using ORDERS_COMBINED_STG as src 31 | on src.customer = tgt.customer and src.delivery_date = tgt.delivery_date and src.baked_good_type = tgt.baked_good_type 32 | when matched then 33 | update set tgt.quantity = src.quantity, 34 | tgt.source_file_name = src.source_file_name, 35 | tgt.load_ts = current_timestamp() 36 | when not matched then 37 | insert (customer, order_date, delivery_date, 38 | baked_good_type, quantity, source_file_name, load_ts) 39 | values(src.customer, src.order_date, src.delivery_date, 40 | src.baked_good_type, src.quantity, src.source_file_name, 41 | current_timestamp()); 42 | return 'Load completed. ' || SQLROWCOUNT || ' rows affected.'; 43 | exception 44 | when other then 45 | return 'Load failed with error message: ' || SQLERRM; 46 | end; 47 | $$ 48 | ; 49 | 50 | -- execute the stored procedure 51 | call LOAD_CUSTOMER_ORDERS(); 52 | 53 | -- after waiting a few minutes, select data from the event table 54 | select * 55 | from bakery_events 56 | order by timestamp desc; 57 | 58 | 59 | -- create summarized table 60 | use database BAKERY_DB; 61 | use schema TRANSFORM; 62 | create table SUMMARY_ORDERS ( 63 | delivery_date date, 64 | baked_good_type varchar, 65 | total_quantity number 66 | ); 67 | 68 | -- insert data into the summarized table (truncate the table first to avoid data duplication) 69 | -- Listing 4.10 70 | truncate table SUMMARY_ORDERS; 71 | insert into SUMMARY_ORDERS(delivery_date, baked_good_type, total_quantity) 72 | select delivery_date, baked_good_type, sum(quantity) as total_quantity 73 | from CUSTOMER_ORDERS_COMBINED 74 | group by all; 75 | 76 | -- create a stored procedure that encapsulates the TRUNCATE and INSERT statements 77 | create or replace procedure LOAD_CUSTOMER_SUMMARY_ORDERS() 78 | returns varchar 79 | language sql 80 | as 81 | $$ 82 | begin 83 | SYSTEM$LOG_DEBUG('LOAD_CUSTOMER_SUMMARY_ORDERS begin '); 84 | 85 | truncate table SUMMARY_ORDERS; 86 | insert into SUMMARY_ORDERS(delivery_date, baked_good_type, total_quantity) 87 | select delivery_date, baked_good_type, sum(quantity) as total_quantity 88 | from CUSTOMER_ORDERS_COMBINED 89 | group by all; 90 | return 'Load completed. ' || SQLROWCOUNT || ' rows inserted.'; 91 | exception 92 | when other then 93 | return 'Load failed with error message: ' || SQLERRM; 94 | end; 95 | $$ 96 | ; 97 | 98 | call LOAD_CUSTOMER_SUMMARY_ORDERS(); 99 | 100 | -- query the summarized table 101 | select * 102 | from SUMMARY_ORDERS 103 | order by delivery_date desc; 104 | -------------------------------------------------------------------------------- /Chapter_04/Orders_2023-08-11.json: -------------------------------------------------------------------------------- 1 | { 2 | "Customer": "Park Inn", 3 | "Order date": "2023-08-11", 4 | "Orders": [ 5 | { 6 | "Delivery date": "2023-08-14", 7 | "Orders by day": [ 8 | { 9 | "Baked good type": "English Muffin", 10 | "Quantity": 30 11 | }, 12 | { 13 | "Baked good type": "Whole Wheat Loaf", 14 | "Quantity": 6 15 | }, 16 | { 17 | "Baked good type": "White Loaf", 18 | "Quantity": 4 19 | }, 20 | { 21 | "Baked good type": "Bagel", 22 | "Quantity": 25 23 | }, 24 | { 25 | "Baked good type": "Croissant", 26 | "Quantity": 36 27 | } 28 | ] 29 | }, 30 | { 31 | "Delivery date": "2023-08-15", 32 | "Orders by day": [ 33 | { 34 | "Baked good type": "English Muffin", 35 | "Quantity": 26 36 | }, 37 | { 38 | "Baked good type": "Whole Wheat Loaf", 39 | "Quantity": 4 40 | }, 41 | { 42 | "Baked good type": "Bagel", 43 | "Quantity": 22 44 | }, 45 | { 46 | "Baked good type": "Croissant", 47 | "Quantity": 30 48 | } 49 | ] 50 | } 51 | ] 52 | } -------------------------------------------------------------------------------- /Chapter_05/Chapter_05.sql: -------------------------------------------------------------------------------- 1 | -- create a storage integration 2 | use role ACCOUNTADMIN; 3 | 4 | create storage integration SPEEDY_INTEGRATION 5 | type = external_stage 6 | storage_provider = 'AZURE' 7 | enabled = true 8 | azure_tenant_id = '1234abcd-xxx-56efgh78' --use your own Tenant ID 9 | storage_allowed_locations = ('azure://speedyorders001.blob.core.windows.net/speedyservicefiles/');; 10 | 11 | -- describe the storage integration and take note of the following parameters: 12 | -- - AZURE_CONSENT_URL 13 | -- - AZURE_MULTI_TENANT_APP_NAME 14 | describe integration SPEEDY_INTEGRATION; 15 | 16 | -- grant usage on storage integration so that the SYSADMIN role can use it 17 | grant usage on integration SPEEDY_INTEGRATION to role SYSADMIN; 18 | 19 | -- create a new schema in the BAKERY_DB database (see Chapter 2) 20 | use role SYSADMIN; 21 | create warehouse if not exists BAKERY_WH with warehouse_size = 'XSMALL'; 22 | create database if not exists BAKERY_DB; 23 | use database BAKERY_DB; 24 | create schema DELIVERY_ORDERS; 25 | use schema DELIVERY_ORDERS; 26 | 27 | -- create an external stage using the storage integration 28 | create stage SPEEDY_STAGE 29 | storage_integration = SPEEDY_INTEGRATION 30 | url = 'azure://speedyorders001.blob.core.windows.net/speedyservicefiles/' 31 | file_format = (type = json); 32 | 33 | -- view files in the external stage 34 | list @SPEEDY_STAGE; 35 | 36 | -- view data in the staged files 37 | select $1 from @SPEEDY_STAGE; 38 | 39 | -- extract the ORDER_ID and ORDER_DATETIME columns from the JSON, but leave ITEMS as variant without parsing 40 | select 41 | $1:"Order id", 42 | $1:"Order datetime", 43 | $1:"Items", 44 | metadata$filename, 45 | current_timestamp() 46 | from @SPEEDY_STAGE; 47 | 48 | -- create staging table for delivery orders 49 | create table SPEEDY_ORDERS_RAW_STG ( 50 | order_id varchar, 51 | order_datetime timestamp, 52 | items variant, 53 | source_file_name varchar, 54 | load_ts timestamp 55 | ); 56 | 57 | -- configure event grid messages for blob storage events 58 | -- - enable the event grid resource provider 59 | -- - create a storage queue and take note of the queue URL 60 | -- - create an event grid subscription with an event grid system topic for the "Blob Created" event 61 | 62 | -- create a notification integration 63 | use role ACCOUNTADMIN; 64 | CREATE NOTIFICATION INTEGRATION SPEEDY_QUEUE_INTEGRATION 65 | ENABLED = true 66 | TYPE = QUEUE 67 | NOTIFICATION_PROVIDER = AZURE_STORAGE_QUEUE 68 | AZURE_STORAGE_QUEUE_PRIMARY_URI = 'https://speedyorders001.queue.core.windows.net/speedyordersqueue' 69 | AZURE_TENANT_ID = '1234abcd-xxx-56efgh78'; 70 | 71 | -- describe the storage integration and take note of the following parameters: 72 | -- - AZURE_CONSENT_URL 73 | -- - AZURE_MULTI_TENANT_APP_NAME 74 | describe notification integration SPEEDY_QUEUE_INTEGRATION; 75 | 76 | -- grant usage on notification integration so that the SYSADMIN role can use it 77 | grant usage on integration SPEEDY_QUEUE_INTEGRATION to role SYSADMIN; 78 | 79 | -- create the snowpipe 80 | use role SYSADMIN; 81 | use database BAKERY_DB; 82 | use schema DELIVERY_ORDERS; 83 | 84 | create pipe SPEEDY_PIPE 85 | auto_ingest = true 86 | integration = 'SPEEDY_QUEUE_INTEGRATION' 87 | as 88 | copy into SPEEDY_ORDERS_RAW_STG 89 | from ( 90 | select 91 | $1:"Order id", 92 | $1:"Order datetime", 93 | $1:"Items", 94 | metadata$filename, 95 | current_timestamp() 96 | from @SPEEDY_STAGE 97 | ); 98 | 99 | -- load historical data from files that existed in the external stage before Event Grid messages were configured 100 | alter pipe SPEEDY_PIPE refresh; 101 | 102 | -- view data in the staging table 103 | select * 104 | from SPEEDY_ORDERS_RAW_STG; 105 | 106 | -- check the status of the pipe 107 | select system$pipe_status('SPEEDY_PIPE'); 108 | 109 | -- view the copy history in the last hour 110 | select * 111 | from table(information_schema.copy_history( 112 | table_name => 'SPEEDY_ORDERS_RAW_STG', 113 | start_time => dateadd(hours, -1, current_timestamp()))); 114 | 115 | -- select the values from the second level keys 116 | select 117 | order_id, 118 | order_datetime, 119 | value:"Item"::varchar as baked_good_type, 120 | value:"Quantity"::number as quantity 121 | from SPEEDY_ORDERS_RAW_STG, 122 | lateral flatten (input => items); 123 | 124 | -- create a dynamic table that materializes the output of the previous query 125 | create dynamic table SPEEDY_ORDERS 126 | target_lag = '1 minute' 127 | warehouse = BAKERY_WH 128 | as 129 | select 130 | order_id, 131 | order_datetime, 132 | value:"Item"::varchar as baked_good_type, 133 | value:"Quantity"::number as quantity, 134 | source_file_name, 135 | load_ts 136 | from SPEEDY_ORDERS_RAW_STG, 137 | lateral flatten (input => items); 138 | 139 | -- query the data in the dynamic table 140 | select * 141 | from SPEEDY_ORDERS 142 | order by order_datetime desc; 143 | 144 | -- query the dynamic table refresh history 145 | select * 146 | from table(information_schema.dynamic_table_refresh_history()) 147 | order by refresh_start_time desc; -------------------------------------------------------------------------------- /Chapter_05/Orders_2023-09-04_12-30-00_12345.json: -------------------------------------------------------------------------------- 1 | { 2 | "Order id": "12345", 3 | "Order datetime": "2023-09-04 12:30:00", 4 | "Items": [ 5 | { 6 | "Item": "Croissant", 7 | "Quantity": 2 8 | }, 9 | { 10 | "Item": "Bagel", 11 | "Quantity": 3 12 | } 13 | ] 14 | } -------------------------------------------------------------------------------- /Chapter_05/Orders_2023-09-04_12-30-00_12346.json: -------------------------------------------------------------------------------- 1 | { 2 | "Order id": "12346", 3 | "Order datetime": "2023-09-04 12:30:00", 4 | "Items": [ 5 | { 6 | "Item": "Croissant", 7 | "Quantity": 5 8 | } 9 | ] 10 | } -------------------------------------------------------------------------------- /Chapter_05/Orders_2023-09-04_12-45-00_12347.json: -------------------------------------------------------------------------------- 1 | { 2 | "Order id": "12347", 3 | "Order datetime": "2023-09-04 12:45:00", 4 | "Items": [ 5 | { 6 | "Item": "Muffin", 7 | "Quantity": 12 8 | } 9 | ] 10 | } -------------------------------------------------------------------------------- /Chapter_05/Orders_2023-09-04_12-45-00_12348.json: -------------------------------------------------------------------------------- 1 | { 2 | "Order id": "12348", 3 | "Order datetime": "2023-09-04 12:45:00", 4 | "Items": [ 5 | { 6 | "Item": "Muffin", 7 | "Quantity": 2 8 | }, 9 | { 10 | "Item": "Croissant", 11 | "Quantity": 3 12 | } 13 | ] 14 | } -------------------------------------------------------------------------------- /Chapter_05/Orders_2023-09-04_13-00-00_12349.json: -------------------------------------------------------------------------------- 1 | { 2 | "Order id": "12349", 3 | "Order datetime": "2023-09-04 13:00:00", 4 | "Items": [ 5 | { 6 | "Item": "Cinnamon Bun", 7 | "Quantity": 6 8 | }, 9 | { 10 | "Item": "Croissant", 11 | "Quantity": 3 12 | } 13 | ] 14 | } -------------------------------------------------------------------------------- /Chapter_05/Orders_2023-09-04_14-00-00_12350.json: -------------------------------------------------------------------------------- 1 | { 2 | "Order id": "12350", 3 | "Order datetime": "2023-09-04 14:00:00", 4 | "Items": [ 5 | { 6 | "Item": "Blueberry Muffin", 7 | "Quantity": 6 8 | }, 9 | { 10 | "Item": "Cinnamon Bun", 11 | "Quantity": 3 12 | } 13 | ] 14 | } -------------------------------------------------------------------------------- /Chapter_05/Orders_2023-09-04_14-30-00_12351.json: -------------------------------------------------------------------------------- 1 | { 2 | "Order id": "12351", 3 | "Order datetime": "2023-09-04 14:30:00", 4 | "Items": [ 5 | { 6 | "Item": "Bagel", 7 | "Quantity": 4 8 | }, 9 | { 10 | "Item": "English Muffin", 11 | "Quantity": 2 12 | }, 13 | { 14 | "Item": "Croissant", 15 | "Quantity": 4 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /Chapter_05/Orders_2023-09-04_14-30-00_12352.json: -------------------------------------------------------------------------------- 1 | { 2 | "Order id": "12352", 3 | "Order datetime": "2023-09-04 14:30:00", 4 | "Items": [ 5 | { 6 | "Item": "Chocolate Muffin", 7 | "Quantity": 12 8 | } 9 | ] 10 | } -------------------------------------------------------------------------------- /Chapter_06/Chapter_06_Part1.sql: -------------------------------------------------------------------------------- 1 | -- create a new schema in the BAKERY_DB database (see Chapter 2) 2 | use role SYSADMIN; 3 | create warehouse if not exists BAKERY_WH with warehouse_size = 'XSMALL'; 4 | create database if not exists BAKERY_DB; 5 | use database BAKERY_DB; 6 | create schema SNOWPARK; 7 | use schema SNOWPARK; 8 | 9 | -- Section 6.5 Ingesting Data from a CSV File into a Snowflake Table 10 | create stage ORDERS_STAGE; 11 | -------------------------------------------------------------------------------- /Chapter_06/Chapter_06_Part2_Snowpark_is_holiday.py: -------------------------------------------------------------------------------- 1 | #Listing 6.1 2 | # the Snowpark package is required for Python Worksheets 3 | import snowflake.snowpark as snowpark 4 | # importing the holidays package 5 | import holidays 6 | 7 | def is_holiday(session: snowpark.Session): 8 | # get a list of all holidays in the US 9 | all_holidays = holidays.country_holidays('US') 10 | # return TRUE if January 1, 2024 is a holiday, otherwise return false 11 | if '2024-01-01' in all_holidays: 12 | return True 13 | else: 14 | return False -------------------------------------------------------------------------------- /Chapter_06/Chapter_06_Part3_Snowpark_proc_is_holiday.sql: -------------------------------------------------------------------------------- 1 | use role SYSADMIN; 2 | use database BAKERY_DB; 3 | use schema SNOWPARK; 4 | 5 | -- create the stored procedure with parameters 6 | create or replace procedure PROC_IS_HOLIDAY(p_date date, p_country string) 7 | returns String 8 | language python 9 | runtime_version = 3.8 10 | packages =('holidays==0.29', 'snowflake-snowpark-python==*') 11 | handler = 'is_holiday' 12 | comment = 'The procedure returns True if the date is a holiday in the country' 13 | as ' 14 | # The Snowpark package is required for Python Worksheets 15 | import snowflake.snowpark as snowpark 16 | # Adding the holidays package 17 | import holidays 18 | 19 | #Listing 6.2 20 | def is_holiday(session: snowpark.Session, p_date, p_country): 21 | # get a list of all holidays in the US 22 | all_holidays = holidays.country_holidays(p_country) 23 | # return TRUE if p_date is a holiday, otherwise return false 24 | if p_date in all_holidays: 25 | return True 26 | else: 27 | return False 28 | '; 29 | 30 | -- execute the procedure using different values for the p_date and p_country parameters 31 | call PROC_IS_HOLIDAY('2024-01-01', 'US'); -- returns True 32 | call PROC_IS_HOLIDAY('2024-07-04', 'US'); -- returns True 33 | call PROC_IS_HOLIDAY('2024-07-14', 'US'); -- returns False 34 | call PROC_IS_HOLIDAY('2024-07-14', 'FR'); -- returns TRTrueUE 35 | call PROC_IS_HOLIDAY('2024-07-04', 'FR'); -- returns False 36 | -------------------------------------------------------------------------------- /Chapter_06/Chapter_06_Part4_Snowpark_connection.py: -------------------------------------------------------------------------------- 1 | # pre-requisites for executing the python code: 2 | # - install a supported Python version 3 | # - create and activate a Python virtual environment 4 | # - install the snowflake-snowpark-python package (using pip or conda, depending on your Python environment) 5 | 6 | #Listing 6.3 7 | # import Session from the snowflake.snowpark package 8 | from snowflake.snowpark import Session 9 | 10 | # create a dictionary with the connection parameters 11 | connection_parameters_dict = { 12 | "account": "pqrstuv-ab12345", # replace with your Snowflake account 13 | "user": "my_user", # replace with your username 14 | "password": "my_pass", # replace with your password 15 | "role": "SYSADMIN", 16 | "warehouse": "BAKERY_WH", 17 | "database": "BAKERY_DB", 18 | "schema": "SNOWPARK" 19 | } 20 | 21 | # create the session 22 | my_session = Session.builder.configs(connection_parameters_dict).create() 23 | 24 | # close the session 25 | my_session.close() -------------------------------------------------------------------------------- /Chapter_06/Chapter_06_Part5_Snowpark_connection_params.py: -------------------------------------------------------------------------------- 1 | #Listing 6.4 2 | # import Session from the snowflake.snowpark package 3 | from snowflake.snowpark import Session 4 | 5 | import json 6 | 7 | # read the credentials from a file 8 | credentials = json.load(open('connection_parameters.json')) 9 | 10 | # create a dictionary with the connection parameters 11 | connection_parameters_dict = { 12 | "account": credentials["account"], 13 | "user": credentials["user"], 14 | "password": credentials["password"], 15 | "role": credentials["role"], 16 | "warehouse": credentials["warehouse"], 17 | "database": credentials["database"], 18 | "schema": credentials["schema"] # optional 19 | } 20 | 21 | # create the session 22 | my_session = Session.builder.configs(connection_parameters_dict).create() 23 | 24 | # select the current timestamp 25 | ts = my_session.sql("select current_timestamp()").collect() 26 | # print the output to the console 27 | print(ts) 28 | 29 | # close the session 30 | my_session.close() 31 | -------------------------------------------------------------------------------- /Chapter_06/Chapter_06_Part6_Snowpark_dim_date.py: -------------------------------------------------------------------------------- 1 | #Listing 6.5 2 | # import Session from the snowflake.snowpark package 3 | from snowflake.snowpark import Session 4 | # import data types from the snowflake.snowpark package 5 | from snowflake.snowpark.types import StructType, StructField, DateType, BooleanType 6 | # import json package for reading connection parameters 7 | import json 8 | # import date and timedelta from the datetime package for generating dates 9 | from datetime import date, timedelta 10 | # install the holidays package using pip or conda 11 | # import the holidays package to determine whether a given date is a holiday 12 | import holidays 13 | 14 | #Listing 6.6 15 | # define a function that returns True if p_date is a holiday in p_country 16 | def is_holiday(p_date, p_country): 17 | # get a list of all holidays in p_country 18 | all_holidays = holidays.country_holidays(p_country) 19 | # return True if p_date is a holiday, otherwise return false 20 | if p_date in all_holidays: 21 | return True 22 | else: 23 | return False 24 | 25 | #Listing 6.7 26 | # generate a list of dates starting from start_date followed by as many days as defined in the no_days variable 27 | # define the start date 28 | start_dt = date(2023, 1, 1) 29 | # define number of days 30 | # use the value 5 to generate a sample dimension with 5 days 31 | no_days = 5 32 | # change the value to 731 to generate dates for 731 days (years 2023 and 2024) 33 | #no_days = 731 34 | # store consecutive dates starting from the start date in a list 35 | dates = [(start_dt + timedelta(days=i)).isoformat() for i in range(no_days)] 36 | 37 | #Listing 6.8 38 | # create a list of lists that combines the list of dates with the output of the is_holiday() function 39 | holiday_flags = [[d, is_holiday(d, 'US')] for d in dates] 40 | 41 | # print the holiday_flags list of lists locally to check that the data is as expected 42 | print(holiday_flags) 43 | 44 | # establish a connection with Snowflake because we need the Snowpark API 45 | 46 | #Refer to Listing 6.4 47 | # read the credentials from a file 48 | credentials = json.load(open('connection_parameters.json')) 49 | # create a dictionary with the connection parameters 50 | connection_parameters_dict = { 51 | "account": credentials["account"], 52 | "user": credentials["user"], 53 | "password": credentials["password"], 54 | "role": credentials["role"], 55 | "warehouse": credentials["warehouse"], 56 | "database": credentials["database"], 57 | "schema": credentials["schema"] # optional 58 | } 59 | 60 | # create a session object for the Snowpark session 61 | my_session = Session.builder.configs(connection_parameters_dict).create() 62 | 63 | #Listing 6.9 64 | # create a data frame from the holiday_flags list of lists and define the schema as two columns: 65 | # - column named "day" with data type DateType 66 | # - column named "holiday_flg" with data type BooleanType 67 | df = my_session.create_dataframe( 68 | holiday_flags, 69 | schema = StructType( 70 | [StructField("day", DateType()), 71 | StructField("holiday_flg", BooleanType())]) 72 | ) 73 | 74 | # print the data frame to verify that it contains the correct data 75 | print(df.collect()) 76 | 77 | #Listing 6.10 78 | # save the data frame to a Snowflake table named DIM_DATE and overwrite the table if it already exists 79 | df.write.mode("overwrite").save_as_table("DIM_DATE") 80 | 81 | # close the Snowpark session 82 | my_session.close() -------------------------------------------------------------------------------- /Chapter_06/Chapter_06_Part7_Snowpark_ingest_CSV.py: -------------------------------------------------------------------------------- 1 | #Listing 6.11 2 | # import Session from the snowflake.snowpark package 3 | from snowflake.snowpark import Session 4 | # import data types from the snowflake.snowpark package 5 | from snowflake.snowpark.types import StructType, StructField, DateType, StringType, DecimalType 6 | # import json package for reading connection parameters 7 | import json 8 | 9 | # assign the source file name to a variable 10 | source_file_name = 'Orders_2023-07-09.csv' 11 | 12 | # establish a connection with Snowflake 13 | 14 | #Refer to Listing 6.4 15 | # read the credentials from a file 16 | credentials = json.load(open('connection_parameters.json')) 17 | # create a dictionary with the connection parameters 18 | connection_parameters_dict = { 19 | "account": credentials["account"], 20 | "user": credentials["user"], 21 | "password": credentials["password"], 22 | "role": credentials["role"], 23 | "warehouse": credentials["warehouse"], 24 | "database": credentials["database"], 25 | "schema": credentials["schema"] # optional 26 | } 27 | 28 | # create a session object for the Snowpark session 29 | my_session = Session.builder.configs(connection_parameters_dict).create() 30 | 31 | #Listing 6.12 32 | # put the file into the stage 33 | result = my_session.file.put(source_file_name, "@orders_stage") 34 | print(result) 35 | 36 | #Listing 6.13 37 | # define the schema for the csv file 38 | schema_for_csv = StructType( 39 | [StructField("Customer", StringType()), 40 | StructField("Order_date", DateType()), 41 | StructField("Delivery_date", DateType()), 42 | StructField("Baked_good_type", StringType()), 43 | StructField("Quantity", DecimalType()) 44 | ]) 45 | 46 | #Listing 6.14 47 | # COPY data from the CSV file to the staging table using the session.read method 48 | df = my_session.read.schema(schema_for_csv).csv("@orders_stage") 49 | result = df.copy_into_table("ORDERS_STG", format_type_options = {"skip_header": 1}) 50 | 51 | # close the Snowpark session 52 | my_session.close() 53 | -------------------------------------------------------------------------------- /Chapter_06/Chapter_06_Part8_Snowpark_data_frames.py: -------------------------------------------------------------------------------- 1 | # import Session from the snowflake.snowpark package 2 | from snowflake.snowpark import Session 3 | # import json package for reading connection parameters 4 | import json 5 | 6 | # establish a connection with Snowflake 7 | 8 | # read the credentials from a file 9 | credentials = json.load(open('connection_parameters.json')) 10 | # create a dictionary with the connection parameters 11 | connection_parameters_dict = { 12 | "account": credentials["account"], 13 | "user": credentials["user"], 14 | "password": credentials["password"], 15 | "role": credentials["role"], 16 | "warehouse": credentials["warehouse"], 17 | "database": credentials["database"], 18 | "schema": credentials["schema"] # optional 19 | } 20 | 21 | # create a session object for the Snowpark session 22 | my_session = Session.builder.configs(connection_parameters_dict).create() 23 | 24 | # retrieve tables into data frames 25 | df_orders = my_session.table("ORDERS_STG") 26 | df_dim_date = my_session.table("DIM_DATE") 27 | 28 | # join the data frames 29 | df_orders_with_holiday_flg = df_orders.join(df_dim_date, df_orders.delivery_date == df_dim_date.day, 'left') 30 | 31 | # create a view from the joined data frames 32 | df_orders_with_holiday_flg.create_or_replace_view("ORDERS_HOLIDAY_FLG") 33 | 34 | # close the Snowpark session 35 | my_session.close() 36 | -------------------------------------------------------------------------------- /Chapter_06/Chapter_06_Part9.sql: -------------------------------------------------------------------------------- 1 | -- view the data in the ORDERS_STG staging table 2 | use schema SNOWPARK; 3 | select * from ORDERS_STG; 4 | 5 | --Listing 6.15 6 | -- join the orders table with the date dimension using SQL 7 | use schema SNOWPARK; 8 | select 9 | customer, order_date, delivery_date, baked_good_type, quantity, 10 | day, holiday_flg 11 | from ORDERS_STG 12 | left join DIM_DATE 13 | on delivery_date = day; 14 | 15 | -- view the data in the ORDERS_HOLIDAY_FLG view 16 | use schema SNOWPARK; 17 | select * from ORDERS_HOLIDAY_FLG; 18 | -------------------------------------------------------------------------------- /Chapter_06/Orders_2023-07-07.csv: -------------------------------------------------------------------------------- 1 | Customer,Order date,Delivery date,Baked good type,Quantity 2 | Coffee Pocket,2023-07-07,2023-07-10,Baguette,6 3 | Coffee Pocket,2023-07-07,2023-07-10,Bagel,12 4 | Coffee Pocket,2023-07-07,2023-07-10,English Muffin,16 5 | Coffee Pocket,2023-07-07,2023-07-10,Croissant,18 6 | Lily's Coffee,2023-07-07,2023-07-10,Bagel,20 7 | Lily's Coffee,2023-07-07,2023-07-10,White Loaf,4 8 | Lily's Coffee,2023-07-07,2023-07-10,Croissant,20 9 | Crave Coffee,2023-07-07,2023-07-10,Croissant,50 10 | Best Burgers,2023-07-07,2023-07-10,Hamburger Bun,75 11 | Page One Fast Food,2023-07-07,2023-07-10,Bagel,20 12 | Page One Fast Food,2023-07-07,2023-07-10,English Muffin,36 13 | Page One Fast Food,2023-07-07,2023-07-10,White Loaf,5 14 | Page One Fast Food,2023-07-07,2023-07-10,Hamburger Bun,50 15 | Jimmy's Diner,2023-07-07,2023-07-10,Bagel,18 16 | Jimmy's Diner,2023-07-07,2023-07-10,Rye Loaf,2 17 | Jimmy's Diner,2023-07-07,2023-07-10,White Loaf,7 18 | Jimmy's Diner,2023-07-07,2023-07-10,Whole Wheat Loaf,3 19 | Jimmy's Diner,2023-07-07,2023-07-10,Hamburger Bun,12 20 | Metro Fine Foods,2023-07-07,2023-07-10,Baguette,24 21 | Metro Fine Foods,2023-07-07,2023-07-10,Rye Loaf,8 22 | Metro Fine Foods,2023-07-07,2023-07-10,White Loaf,28 23 | Metro Fine Foods,2023-07-07,2023-07-10,Whole Wheat Loaf,22 24 | Coffee Pocket,2023-07-07,2023-07-11,Baguette,5 25 | Coffee Pocket,2023-07-07,2023-07-11,Bagel,9 26 | Coffee Pocket,2023-07-07,2023-07-11,English Muffin,12 27 | Coffee Pocket,2023-07-07,2023-07-11,Croissant,14 28 | Lily's Coffee,2023-07-07,2023-07-11,Bagel,15 29 | Lily's Coffee,2023-07-07,2023-07-11,White Loaf,3 30 | Lily's Coffee,2023-07-07,2023-07-11,Croissant,15 31 | Crave Coffee,2023-07-07,2023-07-11,Croissant,38 32 | Best Burgers,2023-07-07,2023-07-11,Hamburger Bun,56 33 | Page One Fast Food,2023-07-07,2023-07-11,Bagel,15 34 | Page One Fast Food,2023-07-07,2023-07-11,English Muffin,27 35 | Page One Fast Food,2023-07-07,2023-07-11,White Loaf,4 36 | Page One Fast Food,2023-07-07,2023-07-11,Hamburger Bun,38 37 | Jimmy's Diner,2023-07-07,2023-07-11,Bagel,14 38 | Jimmy's Diner,2023-07-07,2023-07-11,Rye Loaf,2 39 | Jimmy's Diner,2023-07-07,2023-07-11,White Loaf,5 40 | Jimmy's Diner,2023-07-07,2023-07-11,Whole Wheat Loaf,2 41 | Jimmy's Diner,2023-07-07,2023-07-11,Hamburger Bun,9 42 | Metro Fine Foods,2023-07-07,2023-07-11,Baguette,18 43 | Metro Fine Foods,2023-07-07,2023-07-11,Rye Loaf,6 44 | Metro Fine Foods,2023-07-07,2023-07-11,White Loaf,21 45 | Metro Fine Foods,2023-07-07,2023-07-11,Whole Wheat Loaf,17 46 | Coffee Pocket,2023-07-07,2023-07-12,Baguette,4 47 | Coffee Pocket,2023-07-07,2023-07-12,Bagel,5 48 | Coffee Pocket,2023-07-07,2023-07-12,English Muffin,6 49 | Coffee Pocket,2023-07-07,2023-07-12,Croissant,13 50 | Lily's Coffee,2023-07-07,2023-07-12,Bagel,4 51 | Lily's Coffee,2023-07-07,2023-07-12,White Loaf,3 52 | Crave Coffee,2023-07-07,2023-07-12,Croissant,11 53 | Best Burgers,2023-07-07,2023-07-12,Hamburger Bun,56 54 | Page One Fast Food,2023-07-07,2023-07-12,Bagel,5 55 | Page One Fast Food,2023-07-07,2023-07-12,English Muffin,27 56 | Page One Fast Food,2023-07-07,2023-07-12,White Loaf,2 57 | Page One Fast Food,2023-07-07,2023-07-12,Hamburger Bun,36 58 | Jimmy's Diner,2023-07-07,2023-07-12,Bagel,4 59 | Jimmy's Diner,2023-07-07,2023-07-12,Rye Loaf,2 60 | Jimmy's Diner,2023-07-07,2023-07-12,White Loaf,2 61 | Jimmy's Diner,2023-07-07,2023-07-12,Whole Wheat Loaf,1 62 | Metro Fine Foods,2023-07-07,2023-07-12,Baguette,1 63 | Metro Fine Foods,2023-07-07,2023-07-12,Rye Loaf,6 64 | Metro Fine Foods,2023-07-07,2023-07-12,White Loaf,5 65 | Metro Fine Foods,2023-07-07,2023-07-12,Whole Wheat Loaf,7 66 | -------------------------------------------------------------------------------- /Chapter_06/Orders_2023-07-08.csv: -------------------------------------------------------------------------------- 1 | Customer,Order date,Delivery date,Baked good type,Quantity 2 | Coffee Pocket,2023-07-08,2023-07-11,Baguette,5 3 | Coffee Pocket,2023-07-08,2023-07-11,Bagel,9 4 | Coffee Pocket,2023-07-08,2023-07-11,English Muffin,12 5 | Coffee Pocket,2023-07-08,2023-07-11,Croissant,14 6 | Lily's Coffee,2023-07-08,2023-07-11,Bagel,15 7 | Lily's Coffee,2023-07-08,2023-07-11,White Loaf,3 8 | Lily's Coffee,2023-07-08,2023-07-11,Croissant,15 9 | Crave Coffee,2023-07-08,2023-07-11,Croissant,38 10 | Best Burgers,2023-07-08,2023-07-11,Hamburger Bun,56 11 | Page One Fast Food,2023-07-08,2023-07-11,Bagel,15 12 | Page One Fast Food,2023-07-08,2023-07-11,English Muffin,27 13 | Page One Fast Food,2023-07-08,2023-07-11,White Loaf,4 14 | Page One Fast Food,2023-07-08,2023-07-11,Hamburger Bun,38 15 | Jimmy's Diner,2023-07-08,2023-07-11,Bagel,14 16 | Jimmy's Diner,2023-07-08,2023-07-11,Rye Loaf,2 17 | Jimmy's Diner,2023-07-08,2023-07-11,White Loaf,5 18 | Jimmy's Diner,2023-07-08,2023-07-11,Whole Wheat Loaf,2 19 | Jimmy's Diner,2023-07-08,2023-07-11,Hamburger Bun,9 20 | Metro Fine Foods,2023-07-08,2023-07-11,Baguette,18 21 | Metro Fine Foods,2023-07-08,2023-07-11,Rye Loaf,6 22 | Metro Fine Foods,2023-07-08,2023-07-11,White Loaf,21 23 | Metro Fine Foods,2023-07-08,2023-07-11,Whole Wheat Loaf,17 24 | Coffee Pocket,2023-07-08,2023-07-12,Baguette,4 25 | Coffee Pocket,2023-07-08,2023-07-12,Bagel,5 26 | Coffee Pocket,2023-07-08,2023-07-12,English Muffin,6 27 | Coffee Pocket,2023-07-08,2023-07-12,Croissant,13 28 | Lily's Coffee,2023-07-08,2023-07-12,Bagel,4 29 | Lily's Coffee,2023-07-08,2023-07-12,White Loaf,3 30 | Crave Coffee,2023-07-08,2023-07-12,Croissant,11 31 | Best Burgers,2023-07-08,2023-07-12,Hamburger Bun,56 32 | Page One Fast Food,2023-07-08,2023-07-12,Bagel,5 33 | Page One Fast Food,2023-07-08,2023-07-12,English Muffin,27 34 | Page One Fast Food,2023-07-08,2023-07-12,White Loaf,2 35 | Page One Fast Food,2023-07-08,2023-07-12,Hamburger Bun,36 36 | Jimmy's Diner,2023-07-08,2023-07-12,Bagel,4 37 | Jimmy's Diner,2023-07-08,2023-07-12,Rye Loaf,2 38 | Jimmy's Diner,2023-07-08,2023-07-12,White Loaf,2 39 | Jimmy's Diner,2023-07-08,2023-07-12,Whole Wheat Loaf,1 40 | Metro Fine Foods,2023-07-08,2023-07-12,Baguette,1 41 | Metro Fine Foods,2023-07-08,2023-07-12,Rye Loaf,6 42 | Metro Fine Foods,2023-07-08,2023-07-12,White Loaf,5 43 | Metro Fine Foods,2023-07-08,2023-07-12,Whole Wheat Loaf,7 44 | Coffee Pocket,2023-07-08,2023-07-13,Baguette,5 45 | Coffee Pocket,2023-07-08,2023-07-13,Bagel,5 46 | Coffee Pocket,2023-07-08,2023-07-13,English Muffin,12 47 | Coffee Pocket,2023-07-08,2023-07-13,Croissant,13 48 | Lily's Coffee,2023-07-08,2023-07-13,Bagel,15 49 | Lily's Coffee,2023-07-08,2023-07-13,White Loaf,2 50 | Lily's Coffee,2023-07-08,2023-07-13,Croissant,5 51 | Crave Coffee,2023-07-08,2023-07-13,Croissant,40 52 | Best Burgers,2023-07-08,2023-07-13,Hamburger Bun,62 53 | Page One Fast Food,2023-07-08,2023-07-13,Bagel,16 54 | Page One Fast Food,2023-07-08,2023-07-13,English Muffin,27 55 | Page One Fast Food,2023-07-08,2023-07-13,White Loaf,1 56 | Page One Fast Food,2023-07-08,2023-07-13,Hamburger Bun,49 57 | Jimmy's Diner,2023-07-08,2023-07-13,Bagel,16 58 | Jimmy's Diner,2023-07-08,2023-07-13,White Loaf,3 59 | Jimmy's Diner,2023-07-08,2023-07-13,Whole Wheat Loaf,2 60 | Jimmy's Diner,2023-07-08,2023-07-13,Hamburger Bun,10 61 | Metro Fine Foods,2023-07-08,2023-07-13,Baguette,20 62 | Metro Fine Foods,2023-07-08,2023-07-13,Rye Loaf,4 63 | Metro Fine Foods,2023-07-08,2023-07-13,White Loaf,20 64 | Metro Fine Foods,2023-07-08,2023-07-13,Whole Wheat Loaf,19 65 | -------------------------------------------------------------------------------- /Chapter_06/Orders_2023-07-09.csv: -------------------------------------------------------------------------------- 1 | Customer,Order date,Delivery date,Baked good type,Quantity 2 | Coffee Pocket,2023-07-09,2023-07-13,Baguette,5 3 | Coffee Pocket,2023-07-09,2023-07-13,Bagel,10 4 | Coffee Pocket,2023-07-09,2023-07-13,English Muffin,12 5 | Coffee Pocket,2023-07-09,2023-07-13,Croissant,13 6 | Lily's Coffee,2023-07-09,2023-07-13,Bagel,15 7 | Lily's Coffee,2023-07-09,2023-07-13,White Loaf,4 8 | Lily's Coffee,2023-07-09,2023-07-13,Croissant,15 9 | Crave Coffee,2023-07-09,2023-07-13,Croissant,40 10 | Best Burgers,2023-07-09,2023-07-13,Hamburger Bun,62 11 | Page One Fast Food,2023-07-09,2023-07-13,Bagel,16 12 | Page One Fast Food,2023-07-09,2023-07-13,English Muffin,27 13 | Page One Fast Food,2023-07-09,2023-07-13,White Loaf,2 14 | Page One Fast Food,2023-07-09,2023-07-13,Hamburger Bun,49 15 | Jimmy's Diner,2023-07-09,2023-07-13,Bagel,16 16 | Jimmy's Diner,2023-07-09,2023-07-13,White Loaf,3 17 | Jimmy's Diner,2023-07-09,2023-07-13,Whole Wheat Loaf,2 18 | Jimmy's Diner,2023-07-09,2023-07-13,Hamburger Bun,10 19 | Metro Fine Foods,2023-07-09,2023-07-13,Baguette,20 20 | Metro Fine Foods,2023-07-09,2023-07-13,Rye Loaf,4 21 | Metro Fine Foods,2023-07-09,2023-07-13,White Loaf,20 22 | Metro Fine Foods,2023-07-09,2023-07-13,Whole Wheat Loaf,19 23 | Coffee Pocket,2023-07-09,2023-07-14,Baguette,6 24 | Coffee Pocket,2023-07-09,2023-07-14,Bagel,12 25 | Coffee Pocket,2023-07-09,2023-07-14,English Muffin,10 26 | Coffee Pocket,2023-07-09,2023-07-14,Croissant,12 27 | Lily's Coffee,2023-07-09,2023-07-14,Bagel,10 28 | Lily's Coffee,2023-07-09,2023-07-14,White Loaf,2 29 | Lily's Coffee,2023-07-09,2023-07-14,Croissant,16 30 | Crave Coffee,2023-07-09,2023-07-14,Croissant,32 31 | Best Burgers,2023-07-09,2023-07-14,Hamburger Bun,40 32 | Page One Fast Food,2023-07-09,2023-07-14,Bagel,15 33 | Page One Fast Food,2023-07-09,2023-07-14,English Muffin,20 34 | Page One Fast Food,2023-07-09,2023-07-14,White Loaf,2 35 | Page One Fast Food,2023-07-09,2023-07-14,Hamburger Bun,50 36 | Jimmy's Diner,2023-07-09,2023-07-14,Bagel,16 37 | Jimmy's Diner,2023-07-09,2023-07-14,White Loaf,3 38 | Jimmy's Diner,2023-07-09,2023-07-14,Whole Wheat Loaf,4 39 | Jimmy's Diner,2023-07-09,2023-07-14,Hamburger Bun,10 40 | Metro Fine Foods,2023-07-09,2023-07-14,Baguette,20 41 | Metro Fine Foods,2023-07-09,2023-07-14,Rye Loaf,4 42 | Metro Fine Foods,2023-07-09,2023-07-14,White Loaf,12 43 | Metro Fine Foods,2023-07-09,2023-07-14,Whole Wheat Loaf,16 44 | -------------------------------------------------------------------------------- /Chapter_06/connection_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "account" : "ab12345", 3 | "user" : "my_user", 4 | "password" : "my_pass", 5 | "role" : "SYSADMIN", 6 | "warehouse" : "BAKERY_WH", 7 | "database" : "BAKERY_DB", 8 | "schema" : "SNOWPARK" 9 | } -------------------------------------------------------------------------------- /Chapter_07/Chapter_07_Part1_customer_reviews.sql: -------------------------------------------------------------------------------- 1 | -- create a new schema in the BAKERY_DB database (see Chapter 2) 2 | use role SYSADMIN; 3 | create warehouse if not exists BAKERY_WH with warehouse_size = 'XSMALL'; 4 | create database if not exists BAKERY_DB; 5 | use database BAKERY_DB; 6 | create schema REVIEWS; 7 | use schema REVIEWS; 8 | 9 | -- use role ACCOUNTADMIN to grant privilege 10 | use role ACCOUNTADMIN; 11 | -- grant CREATE_NETWORK RULE, CREATE SECRET, and CREATE INTEGRATION privileges to role SYSADMIN 12 | grant create network rule on schema REVIEWS to role SYSADMIN; 13 | grant create secret on schema REVIEWS to role SYSADMIN; 14 | grant create integration on account to role SYSADMIN; 15 | -- switch back to the SYSADMIN role 16 | use role SYSADMIN; 17 | 18 | -- create a network rule 19 | use role SYSADMIN; 20 | create network rule YELP_API_NETWORK_RULE 21 | mode = EGRESS 22 | type = HOST_PORT 23 | value_list = ('api.yelp.com'); 24 | 25 | -- create a secret 26 | create secret YELP_API_TOKEN 27 | type = GENERIC_STRING 28 | secret_string = 'ab12DE...89XYZ'; 29 | 30 | -- grant usage on the secret to a custom role if that role will be using the secret 31 | grant read on secret YELP_API_TOKEN to role ; 32 | 33 | -- create an external access integration 34 | create external access integration YELP_API_INTEGRATION 35 | allowed_network_rules = (YELP_API_NETWORK_RULE) 36 | allowed_authentication_secrets = (YELP_API_TOKEN) 37 | enabled = TRUE; 38 | 39 | -- create a UDF that calls the API endpoint using the external access integration and the secret 40 | --Listing 7.1 41 | create or replace function GET_CUSTOMER_REVIEWS(business_alias varchar) 42 | returns variant 43 | language python 44 | runtime_version = 3.10 45 | handler = 'get_reviews' 46 | external_access_integrations = (YELP_API_INTEGRATION) 47 | secrets = ('yelp_api_token' = YELP_API_TOKEN) 48 | packages = ('requests') 49 | AS 50 | --Listing 7.2 51 | $$ 52 | import _snowflake 53 | import requests 54 | 55 | def get_reviews(business_alias): 56 | api_key = _snowflake.get_generic_secret_string('yelp_api_token') 57 | url = f'''https://api.yelp.com/v3/businesses/{business_alias}/reviews''' 58 | response = requests.get( 59 | url=url, 60 | headers = {'Authorization': 'Bearer ' + api_key} 61 | ) 62 | return response.json() 63 | $$; 64 | 65 | -- select from the UDF 66 | select GET_CUSTOMER_REVIEWS('boulangerie-julien-paris-3'); 67 | 68 | -- select the value from the "reviews key" 69 | --Listing 7.3 70 | select GET_CUSTOMER_REVIEWS('boulangerie-julien-paris-3'):"reviews"; 71 | 72 | -- flatten the values of the "rating", "time_created", and "text" keys 73 | --Listing 7.5 74 | select 75 | value:"rating"::number as rating, 76 | value:"time_created"::timestamp as time_created, 77 | value:"text"::varchar as customer_review 78 | from table(flatten(input => 79 | GET_CUSTOMER_REVIEWS('boulangerie-julien-paris-3'):"reviews" 80 | )); 81 | 82 | -- create a table to store the customer reviews 83 | use schema REVIEWS; 84 | create table CUSTOMER_REVIEWS ( 85 | rating number, 86 | time_created timestamp, 87 | customer_review varchar 88 | ); 89 | 90 | -- insert the result of the previous query into the table 91 | insert into CUSTOMER_REVIEWS 92 | select 93 | value:"rating"::number as rating, 94 | value:"time_created"::timestamp as time_created, 95 | regexp_replace(value:"text"::varchar, 96 | '[^a-zA-Z0-9 .,!?-]+')::varchar as customer_review 97 | from table(flatten( 98 | input => GET_CUSTOMER_REVIEWS('boulangerie-julien-paris-3'):"reviews" 99 | )); 100 | 101 | 102 | -- select data from the table 103 | select * from CUSTOMER_REVIEWS; -------------------------------------------------------------------------------- /Chapter_07/Chapter_07_Part2_review_sentiment.sql: -------------------------------------------------------------------------------- 1 | -- grant the SNOWFLAKE.CORTEX_USER to the SYSADMIN role 2 | use role ACCOUNTADMIN; 3 | grant database role SNOWFLAKE.CORTEX_USER to role SYSADMIN; 4 | 5 | -- use the SYSADMIN role and the REVIEWS schema in the BAKERY_DB database 6 | use role SYSADMIN; 7 | use database BAKERY_DB; 8 | use schema REVIEWS; 9 | 10 | -- get the sentiment score from different examples of text 11 | select SNOWFLAKE.CORTEX.SENTIMENT('The service was excellent!'); 12 | select SNOWFLAKE.CORTEX.SENTIMENT('The bagel was stale.'); 13 | select SNOWFLAKE.CORTEX.SENTIMENT('I went to the bakery for lunch.'); 14 | 15 | -- map the sentiment score to Positive, Negative, and Neutral 16 | select 17 | rating, 18 | time_created, 19 | customer_review, 20 | SNOWFLAKE.CORTEX.SENTIMENT(customer_review) as sentiment_score, 21 | case 22 | when sentiment_score < -0.7 then 'Negative' 23 | when sentiment_score < 0.4 then 'Neutral' 24 | else 'Positive' 25 | end as sentiment 26 | from CUSTOMER_REVIEWS; 27 | -------------------------------------------------------------------------------- /Chapter_07/Chapter_07_Part3_read_emails.sql: -------------------------------------------------------------------------------- 1 | --python -m pip install snowflake-ml-python 2 | use role SYSADMIN; 3 | use database BAKERY_DB; 4 | use schema REVIEWS; 5 | 6 | -- create a stored procedure that calls the Snowflake Cortex Complete model 7 | -- it then converts the resulting CSV output into a data frame and saves it to a table 8 | create or replace procedure READ_EMAIL_PROC(email_content varchar) 9 | returns table() 10 | language python 11 | runtime_version = 3.10 12 | handler = 'get_order_info_from_email' 13 | packages = ('snowflake-snowpark-python', 'snowflake-ml-python') 14 | AS 15 | $$ 16 | import _snowflake 17 | import snowflake.snowpark as snowpark 18 | from snowflake.snowpark.types import StructType, StructField, DateType, StringType, IntegerType 19 | from snowflake.cortex import Complete 20 | 21 | def get_order_info_from_email(session: snowpark.Session, email_content): 22 | 23 | prompt = f"""You are a bakery employee, reading customer emails asking for deliveries. \ 24 | Please read the email at the end of this text and extract information about the ordered items. \ 25 | Format the information in CSV using the following columns: customer, order_date, delivery_date, item, and quantity. \ 26 | Format the date as YYYY-MM-DD. If no year is given, assume the current year. \ 27 | Use the current date in the format YYYY-MM-DD for the order date. \ 28 | Items should be in this list: [white loaf, rye loaf, baguette, bagel, croissant, chocolate muffin, blueberry muffin]. \ 29 | The content of the email follows this line. \n {email_content}""" 30 | 31 | csv_output = Complete('snowflake-arctic', prompt) 32 | 33 | schema = StructType([ 34 | StructField("CUSTOMER", StringType(), False), 35 | StructField("ORDER_DATE", DateType(), False), 36 | StructField("DELIVERY_DATE", DateType(), False), 37 | StructField("ITEM", StringType(), False), 38 | StructField("QUANTITY", IntegerType(), False) 39 | ]) 40 | 41 | orders_df = session.create_dataframe([x.split(',') for x in csv_output.split("\n")][1:], schema) 42 | orders_df.write.mode("append").save_as_table('COLLECTED_ORDERS_FROM_EMAIL') 43 | 44 | return orders_df 45 | $$; 46 | 47 | -- execute the stored procedure and provide a sample of an email content 48 | call READ_EMAIL_PROC('Hello, please deliver 6 loaves of white bread on Tuesday, September 5. On Wednesday, September 6, we need 16 bagels. Thanks, Lilys Coffee'); 49 | 50 | -- select from the table to verify that the csv was written to the table 51 | select * from COLLECTED_ORDERS_FROM_EMAIL; 52 | 53 | -- a few more sample email contents to test the stored procedure 54 | call READ_EMAIL_PROC('Hi again. At Metro Fine Foods, we are renewing our order for Thursday, September 7. We need 20 baguettes, 16 croissants, and a dozen blueberry muffins. Have a nice day!'); 55 | 56 | call READ_EMAIL_PROC('Greetings! We loved your French bread last week. Please deliver 10 more tomorrow. Cheers from your friends at Page One Fast Food'); 57 | 58 | call READ_EMAIL_PROC('Do you deliver pizza? If so, send two this afternoon. If not, then some bagels should do. Best, Jimmys Diner'); 59 | -------------------------------------------------------------------------------- /Chapter_08/Chapter_08.sql: -------------------------------------------------------------------------------- 1 | use role ACCOUNTADMIN; 2 | -- Get the “Sample Harmonized Data for Top CPG Retailers and Distributors” listing from the Snowflake Marketplace 3 | -- Save the listing into a database named CPG_RETAILERS_AND_DISTRIBUTORS 4 | 5 | -- grant usage on the database to the SYSADMIN role if you haven't granted it already when getting the data 6 | grant imported privileges on database CPG_RETAILERS_AND_DISTRIBUTORS to role SYSADMIN; 7 | 8 | -- query that selects individual stores, converts their latitude and longitude into a geography data type 9 | -- and calculates the distance in kilometers from Dayton, Ohio 10 | -- results are limited to 5 rows for better performance while developing the query 11 | select distinct 12 | store_id, 13 | store_latitude, 14 | store_longitude, 15 | TO_GEOGRAPHY( 16 | 'Point('||store_longitude||' '||store_latitude||')' 17 | ) as store_loc_geo, 18 | ST_DISTANCE( 19 | TO_GEOGRAPHY('Point(-84.19 39.76)'), store_loc_geo 20 | )/1000 as distance_km 21 | from CPG_RETAILERS_AND_DISTRIBUTORS.PUBLIC.HARMONIZED_RETAILER_SALES 22 | limit 5; 23 | 24 | -- create a new schema in the BAKERY_DB database (see Chapter 2) 25 | use role SYSADMIN; 26 | create warehouse if not exists BAKERY_WH with warehouse_size = 'XSMALL'; 27 | use warehouse BAKERY_WH; 28 | create database if not exists BAKERY_DB; 29 | use database BAKERY_DB; 30 | create schema RETAIL_ANALYSIS; 31 | use schema RETAIL_ANALYSIS; 32 | 33 | -- create a table by selecting all columns from HARMONIZED_RETAIL_SALES from the shared database 34 | -- adding each store’s location in a geography data type and the distance between the store and Dayton, Ohio 35 | create table RETAILER_SALES as 36 | select *, 37 | TO_GEOGRAPHY( 38 | 'Point('||store_longitude||' '||store_latitude||')' 39 | ) as store_loc_geo, 40 | ST_DISTANCE( 41 | TO_GEOGRAPHY('Point(-84.19 39.76)'), store_loc_geo 42 | )/1000 as distance_km 43 | from CPG_RETAILERS_AND_DISTRIBUTORS.PUBLIC.HARMONIZED_RETAILER_SALES; 44 | 45 | -- select top 100 stores that are closest to the bakery's location 46 | select distinct 47 | store_id, 48 | distance_km 49 | from RETAILER_SALES 50 | order by distance_km 51 | limit 100; 52 | 53 | -- chose store_id 392366678147865718 to perform further analysis 54 | 55 | -- select each product sold in the chosen store and the total quantity sold 56 | -- Listing 8.1 57 | select 58 | product_id, 59 | sum(sales_quantity) as tot_quantity 60 | from RETAILER_SALES 61 | where store_id = 392366678147865718 62 | group by product_id; 63 | 64 | -- count the rows in the entire table and the number of filtered rows 65 | select 66 | 'Total rows' as filtering_type, 67 | count(*) as row_cnt 68 | from retailer_sales 69 | union all 70 | select 71 | 'Filtered rows' as filtering_type, 72 | count(*) as row_cnt 73 | from retailer_sales 74 | where store_id = 392366678147865718; 75 | 76 | -- view clustering information 77 | select SYSTEM$CLUSTERING_INFORMATION('retailer_sales', '(store_id)'); 78 | 79 | -- add a clustering key 80 | alter table RETAILER_SALES cluster by (store_id); 81 | 82 | -- monitor the clustering process 83 | -- grant privilege first 84 | use role ACCOUNTADMIN; 85 | grant MONITOR USAGE ON ACCOUNT to role SYSADMIN; 86 | use role SYSADMIN; 87 | select * 88 | from table(information_schema.automatic_clustering_history( 89 | date_range_start=>dateadd(D, -1, current_date), 90 | table_name=>'BAKERY_DB.RETAIL_ANALYSIS.RETAILER_SALES')); 91 | 92 | -- execute the query from Listing 8.1 again 93 | select 94 | product_id, 95 | sum(sales_quantity) as tot_quantity 96 | from RETAILER_SALES 97 | where store_id = 392366678147865718 98 | group by product_id; 99 | 100 | -- view the clustering information again 101 | select SYSTEM$CLUSTERING_INFORMATION('retailer_sales', '(store_id)'); 102 | 103 | -- sum of the sold quantity of a chosen product in each store 104 | -- if you don’t see a product with an ID value of 4120371332641752996, select a different product 105 | -- Listing 8.4 106 | select store_id, sum(sales_quantity) as tot_quantity 107 | from RETAILER_SALES 108 | where product_id = 4120371332641752996 109 | group by store_id; 110 | 111 | -- add search optimization 112 | alter table RETAILER_SALES add search optimization on equality(product_id); 113 | 114 | -- view the search optimization parameters 115 | show tables like 'RETAILER_SALES'; 116 | 117 | -- grant the GOVERNANCE_VIEWER database role to SYSADMIN 118 | use role ACCOUNTADMIN; 119 | grant database role SNOWFLAKE.GOVERNANCE_VIEWER to role SYSADMIN; 120 | use role SYSADMIN; 121 | 122 | -- select the longest running queries from query history 123 | select 124 | query_id, 125 | query_text, 126 | partitions_scanned, 127 | partitions_total, 128 | total_elapsed_time 129 | from SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY 130 | where TO_DATE(start_time) > DATEADD(day,-1,TO_DATE(CURRENT_TIMESTAMP())) 131 | order by total_elapsed_time desc 132 | limit 50; 133 | -------------------------------------------------------------------------------- /Chapter_09/Chapter_09.sql: -------------------------------------------------------------------------------- 1 | -- This chapter assumes that you have the “Sample Harmonized Data for Top CPG Retailers and Distributors” listing from the Snowflake Marketplace (see Chapter 8) 2 | 3 | -- use the RETAIL_ANALYSIS schema in the BAKERY_DB database (see Chapter 8) 4 | use role SYSADMIN; 5 | use database BAKERY_DB; 6 | use schema RETAIL_ANALYSIS; 7 | 8 | -- you must have the RETAILER_SALES table (see Chapter 8) 9 | 10 | -- create a set of virtual warehouses in increasing sizes 11 | create warehouse BAKERY_WH_XSMALL with warehouse_size = 'xsmall'; 12 | create warehouse BAKERY_WH_SMALL with warehouse_size = 'small'; 13 | create warehouse BAKERY_WH_MEDIUM with warehouse_size = 'medium'; 14 | create warehouse BAKERY_WH_LARGE with warehouse_size = 'large'; 15 | 16 | -- construct a complex query that 17 | -- - selects the total sold quantity of each product in each store 18 | -- - adds a condition to include only stores which sell more than 100 distinct products 19 | -- - sorts the results by the distance 20 | -- Listing 9.1 21 | select 22 | store_id, 23 | distance_km, 24 | product_id, 25 | sum(sales_quantity) as total_quantity 26 | from RETAILER_SALES 27 | where store_id in ( 28 | select store_id 29 | from ( 30 | select store_id, 31 | count(distinct product_id) as product_cnt 32 | from RETAILER_SALES 33 | group by store_id 34 | having product_cnt > 100 35 | ) 36 | ) 37 | group by store_id, distance_km, product_id 38 | order by distance_km; 39 | 40 | -- use the extra small warehouse 41 | use warehouse BAKERY_WH_XSMALL; 42 | -- execute the query above (Listing 9.1) 43 | -- open the query profile after executing 44 | -- take note of the Total execution time and Bytes spilled to local storage statistics 45 | 46 | -- use the small warehouse 47 | use warehouse BAKERY_WH_SMALL; 48 | -- execute the query above (Listing 9.1) 49 | -- open the query profile after executing 50 | -- notice that the query results were reused 51 | 52 | -- set the session so that it doesn't reuse query results - for testing only 53 | alter session set use_cached_result = FALSE; 54 | 55 | -- still using the small warehouse, execute the query above (Listing 9.1) 56 | -- open the query profile after executing 57 | -- take note of the Total execution time and Bytes spilled to local storage statistics 58 | 59 | -- use the medium warehouse 60 | use warehouse BAKERY_WH_MEDIUM; 61 | -- execute the query above (Listing 9.1) 62 | -- open the query profile after executing 63 | -- take note of the Total execution time and Bytes spilled to local storage statistics 64 | 65 | -- use the large warehouse 66 | use warehouse BAKERY_WH_LARGE; 67 | -- execute the query above (Listing 9.1) 68 | -- open the query profile after executing 69 | -- take note of the Total execution time and Bytes spilled to local storage statistics 70 | 71 | 72 | -- reduce spilling 73 | -- use the extra small warehouse 74 | use warehouse BAKERY_WH_XSMALL; 75 | 76 | -- add filter to the previous query (Listing 9.1) to select only stores that within 1000 km 77 | -- Listing 9.2 78 | select 79 | store_id, 80 | distance_km, 81 | product_id, 82 | sum(sales_quantity) as total_quantity 83 | from RETAILER_SALES 84 | where store_id in ( 85 | select store_id 86 | from ( 87 | select store_id, 88 | count(distinct product_id) as product_cnt 89 | from RETAILER_SALES 90 | where distance_km < 1000 91 | group by store_id 92 | having product_cnt > 100 93 | ) 94 | ) 95 | group by store_id, distance_km, product_id 96 | order by distance_km; 97 | 98 | -- open the query profile after executing 99 | -- examine the Bytes spilled to local storage statistic - it should be less than before adding the filter 100 | 101 | -- set the session parameter to its original value that allows reusing query results 102 | alter session set use_cached_result = TRUE; 103 | 104 | 105 | -- count the number of records 106 | select count(*) from RETAILER_SALES; 107 | -- open the query profile after executing 108 | -- note that it is a metadata operation 109 | 110 | 111 | -- change the AUTO_SUSPEND parameter to 5 minutes (300 seconds) 112 | alter warehouse BAKERY_WH_XSMALL set AUTO_SUSPEND = 300; 113 | 114 | 115 | -- grant the USAGE_VIEWER database role to SYSADMIN 116 | use role ACCOUNTADMIN; 117 | grant database role SNOWFLAKE.USAGE_VIEWER to role SYSADMIN; 118 | use role SYSADMIN; 119 | 120 | -- summarize the queuing time and the total execution time by each warehouse by day for the past 7 days 121 | select 122 | to_date(start_time) as start_date, 123 | warehouse_name, 124 | sum(avg_running) as total_running, 125 | sum(avg_queued_load) as total_queued 126 | from SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_LOAD_HISTORY 127 | where TO_DATE(start_time) > DATEADD(day,-7,TO_DATE(CURRENT_TIMESTAMP())) 128 | group by all 129 | order by 1, 2; 130 | 131 | -- limit the number of concurrently running queries to 6 132 | alter warehouse BAKERY_WH_LARGE set MAX_CONCURRENCY_LEVEL = 6; 133 | -------------------------------------------------------------------------------- /Chapter_10/Chapter_10_Part1_role_based_access_control.sql: -------------------------------------------------------------------------------- 1 | use role SYSADMIN; 2 | create warehouse if not exists BAKERY_WH with warehouse_size = 'XSMALL'; 3 | create database if not exists BAKERY_DB; 4 | use database BAKERY_DB; 5 | 6 | -- create schemas with managed access 7 | create schema RAW with managed access; 8 | create schema RPT with managed access; 9 | 10 | -- using the USERADMIN role (because this role has the CREATE ROLE privilege) 11 | use role USERADMIN; 12 | 13 | -- create the access roles for full access and for read-only access 14 | create role BAKERY_FULL; 15 | create role BAKERY_READ; 16 | 17 | -- create the functional roles 18 | create role DATA_ENGINEER; 19 | create role DATA_ANALYST; 20 | 21 | -- using the SECURITYADMIN role (because this role has the MANAGE GRANTS privilege) 22 | use role SECURITYADMIN; 23 | 24 | -- grant privileges to each of the access roles 25 | 26 | -- grant full privileges on database BAKERY_DB to the BAKERY_FULL role 27 | grant usage on database BAKERY_DB to role BAKERY_FULL; 28 | grant usage on all schemas in database BAKERY_DB to role BAKERY_FULL; 29 | grant all on schema BAKERY_DB.RAW to role BAKERY_FULL; 30 | grant all on schema BAKERY_DB.RPT to role BAKERY_FULL; 31 | 32 | -- grant read-only privileges on database BAKERY_DB to the BAKERY_READ role 33 | grant usage on database BAKERY_DB to role BAKERY_READ; 34 | grant usage on all schemas in database BAKERY_DB to role BAKERY_READ; 35 | grant select on all tables in schema BAKERY_DB.RPT to role BAKERY_READ; 36 | grant select on all views in schema BAKERY_DB.RPT to role BAKERY_READ; 37 | 38 | -- grant future privileges 39 | grant select on future tables in schema BAKERY_DB.RPT to role BAKERY_READ; 40 | grant select on future views in schema BAKERY_DB.RPT to role BAKERY_READ; 41 | 42 | -- grant access roles to functional roles 43 | -- grant the BAKERY_FULL role to the DATA_ENGINEER role 44 | grant role BAKERY_FULL to role DATA_ENGINEER; 45 | -- grant the BAKERY_READ role to the DATA_ANALYST role 46 | grant role BAKERY_READ to role DATA_ANALYST; 47 | 48 | -- grant both functional roles to the SYSADMIN role 49 | grant role DATA_ENGINEER to role SYSADMIN; 50 | grant role DATA_ANALYST to role SYSADMIN; 51 | 52 | -- grant the functional roles to the users who perform those business functions 53 | -- in this exercise we grant both functional roles to our current user to be able to test them 54 | 55 | set my_current_user = current_user(); 56 | grant role DATA_ENGINEER to user IDENTIFIER($my_current_user); 57 | grant role DATA_ANALYST to user IDENTIFIER($my_current_user); 58 | 59 | -- grant usage on the BAKERY_WH warehouse to the functional roles 60 | grant usage on warehouse BAKERY_WH to role DATA_ENGINEER; 61 | grant usage on warehouse BAKERY_WH to role DATA_ANALYST; 62 | 63 | -- to test, use the DATA_ENGINEER role to create a table in the RAW schema and insert some sample values 64 | use role DATA_ENGINEER; 65 | use warehouse BAKERY_WH; 66 | use database BAKERY_DB; 67 | use schema RAW; 68 | 69 | create table EMPLOYEE ( 70 | id integer, 71 | name varchar, 72 | home_address varchar, 73 | department varchar, 74 | hire_date date 75 | ); 76 | 77 | insert into EMPLOYEE values 78 | (1001, 'William Jones', '5170 Arcu St.', 'Bread', '2020-02-01'), 79 | (1002, 'Alexander North', '261 Ipsum Rd.', 'Pastry', '2021-04-01'), 80 | (1003, 'Jennifer Navarro', '880 Dictum Ave.', 'Pastry', '2019-08-01'), 81 | (1004, 'Sandra Perkins', '55 Velo St.', 'Bread', '2022-05-01'); 82 | 83 | -- use the DATA_ANALYST role to select from the table in the RAW schema 84 | use role DATA_ANALYST; 85 | select * from RAW.EMPLOYEE; 86 | -- should not succeed because the DATA_ANALYST has no privileges in the RAW schema 87 | 88 | -- switch to the DATA_ENGINEER role and create a view in the RPT schema 89 | use role DATA_ENGINEER; 90 | create view RPT.EMPLOYEE as 91 | select id, name, home_address, department, hire_date 92 | from RAW.EMPLOYEE; 93 | 94 | -- switch to the DATA_ANALYST role and select from the view in the RPT schema 95 | use role DATA_ANALYST; 96 | select * from RPT.EMPLOYEE; 97 | -- should return values -------------------------------------------------------------------------------- /Chapter_10/Chapter_10_Part2_row_access_policy.sql: -------------------------------------------------------------------------------- 1 | use role SYSADMIN; 2 | use database BAKERY_DB; 3 | 4 | -- create schema with managed access using the SYSADMIN role 5 | create schema DG with managed access; 6 | grant all on schema DG to role BAKERY_FULL; 7 | 8 | use role USERADMIN; 9 | -- create the functional roles 10 | create role DATA_ANALYST_BREAD; 11 | create role DATA_ANALYST_PASTRY; 12 | 13 | -- grant the BAKERY_READ access role to functional roles 14 | grant role BAKERY_READ to role DATA_ANALYST_BREAD; 15 | grant role BAKERY_READ to role DATA_ANALYST_PASTRY; 16 | 17 | -- grant the functional roles to the users who perform those business functions 18 | -- in this exercise we grant both functional roles to our current user to be able to test them 19 | 20 | set my_current_user = current_user(); 21 | grant role DATA_ANALYST_BREAD to user IDENTIFIER($my_current_user); 22 | grant role DATA_ANALYST_PASTRY to user IDENTIFIER($my_current_user); 23 | 24 | -- grant usage on the BAKERY_WH warehouse to the functional roles 25 | use role SYSADMIN; 26 | grant usage on warehouse BAKERY_WH to role DATA_ANALYST_BREAD; 27 | grant usage on warehouse BAKERY_WH to role DATA_ANALYST_PASTRY; 28 | 29 | -- to keep the exercise simple, the DATA_ENGINEER role creates and applies row access policies 30 | -- grant privileges to create and apply row access policies to the DATA_ENGINEER role 31 | use role ACCOUNTADMIN; 32 | grant create row access policy on schema BAKERY_DB.DG to role DATA_ENGINEER; 33 | grant apply row access policy on account to role DATA_ENGINEER; 34 | 35 | -- use the DATA_ENGINEER role to create the row access policy 36 | use role DATA_ENGINEER; 37 | use warehouse BAKERY_WH; 38 | use database BAKERY_DB; 39 | use schema DG; 40 | 41 | create row access policy DG.RAP_BUSINES_UNIT 42 | as (DEPARTMENT varchar) 43 | returns boolean -> 44 | case 45 | -- return TRUE when the role is the creator of the row access policy 46 | when (is_role_in_session('DATA_ENGINEER')) 47 | then TRUE 48 | -- grant access based on the mapping of role and department 49 | when (is_role_in_session('DATA_ANALYST_BREAD')) and DEPARTMENT = 'Bread' 50 | then TRUE 51 | when (is_role_in_session('DATA_ANALYST_PASTRY')) and DEPARTMENT = 'Pastry' 52 | then TRUE 53 | -- otherwise return FALSE 54 | else FALSE 55 | end; 56 | 57 | -- apply the row access policy to the EMPLOYEE view in the RPT schema 58 | alter view BAKERY_DB.RPT.EMPLOYEE add row access policy RAP_BUSINES_UNIT on (DEPARTMENT); 59 | 60 | -- test to verify that the row access policy is working as expected 61 | -- the DATA_ANALYST_BREAD role should see only the data in the 'Bread' department 62 | use role DATA_ANALYST_BREAD; 63 | select * from BAKERY_DB.RPT.EMPLOYEE; 64 | 65 | -- the DATA_ANALYST_PASTRY role should see only the data in the 'Pastry' department 66 | use role DATA_ANALYST_PASTRY; 67 | select * from BAKERY_DB.RPT.EMPLOYEE; 68 | 69 | -------------------------------------------------------------------------------- /Chapter_10/Chapter_10_Part3_masking_policy.sql: -------------------------------------------------------------------------------- 1 | -- to keep the exercise simple, the DATA_ENGINEER role creates and applies masking policies 2 | -- grant privileges to create and apply masking policies to the DATA_ENGINEER role 3 | use role ACCOUNTADMIN; 4 | grant create masking policy on schema BAKERY_DB.DG to role DATA_ENGINEER; 5 | grant apply masking policy on account to role DATA_ENGINEER; 6 | 7 | -- use the DATA_ENGINEER role to create the masking policy 8 | use role DATA_ENGINEER; 9 | use warehouse BAKERY_WH; 10 | use database BAKERY_DB; 11 | use schema DG; 12 | 13 | -- create a masking policy that masks the addr column when the current role is not DATA_ENGINEER 14 | create masking policy ADDRESS_MASK 15 | as (addr varchar) 16 | returns varchar -> 17 | case 18 | when current_role() in ('DATA_ENGINEER') then addr 19 | else '***' 20 | end; 21 | 22 | -- apply the masking policy to the EMPLOYEE view in the RPT schema 23 | alter view BAKERY_DB.RPT.EMPLOYEE 24 | modify column HOME_ADDRESS 25 | set masking policy ADDRESS_MASK; 26 | 27 | -- to test, use one of the data analyst roles 28 | -- should return masked data 29 | use role DATA_ANALYST_BREAD; 30 | select * from BAKERY_DB.RPT.EMPLOYEE; 31 | 32 | -- then use the DATA_ENGINEER role 33 | -- should return unmasked data 34 | use role DATA_ENGINEER; 35 | select * from BAKERY_DB.RPT.EMPLOYEE; 36 | 37 | -------------------------------------------------------------------------------- /Chapter_11/Chapter_11_Part1_create_schema.sql: -------------------------------------------------------------------------------- 1 | -- refer to Chapter_10_Part1_role_based_access_control.sql 2 | 3 | use role SYSADMIN; 4 | use database BAKERY_DB; 5 | 6 | -- create schema with managed access using the SYSADMIN role 7 | create schema EXT with managed access; 8 | create schema STG with managed access; 9 | create schema DWH with managed access; 10 | create schema MGMT with managed access; 11 | 12 | -- using the SECURITYADMIN role (because this role has the MANAGE GRANTS privilege) 13 | use role SECURITYADMIN; 14 | 15 | -- grant full privileges on all schemas to the BAKERY_FULL role 16 | grant all on schema BAKERY_DB.EXT to role BAKERY_FULL; 17 | grant all on schema BAKERY_DB.STG to role BAKERY_FULL; 18 | grant all on schema BAKERY_DB.DWH to role BAKERY_FULL; 19 | grant all on schema BAKERY_DB.MGMT to role BAKERY_FULL; 20 | 21 | -- grant read-only privileges on the MGMT schema to the BAKERY_READ role 22 | grant select on all tables in schema BAKERY_DB.MGMT to role BAKERY_READ; 23 | grant select on all views in schema BAKERY_DB.MGMT to role BAKERY_READ; 24 | 25 | -- grant future privileges 26 | grant select on future tables in schema BAKERY_DB.MGMT to role BAKERY_READ; 27 | grant select on future views in schema BAKERY_DB.MGMT to role BAKERY_READ; -------------------------------------------------------------------------------- /Chapter_11/Chapter_11_Part2_EXT_layer.sql: -------------------------------------------------------------------------------- 1 | -- create a storage integration object named PARK_INN_INTEGRATION as described in Chapter 4 2 | -- if you created the storage integration already in Chapter 4, no need to recreate it 3 | -- grant usage on the storage integration object to the DATA_ENGINEER role 4 | use role ACCOUNTADMIN; 5 | grant usage on integration PARK_INN_INTEGRATION to role DATA_ENGINEER; 6 | 7 | -- use the DATA_ENGINEER role going forward 8 | use role DATA_ENGINEER; 9 | use warehouse BAKERY_WH; 10 | use database BAKERY_DB; 11 | use schema EXT; 12 | 13 | -- create an external stage named JSON_ORDERS_STAGE using the PARK_INN_INTEGRATION as described in Chapter 4 14 | -- be sure to create the external stage with the JSON file format, eg. file_format = (type = json) 15 | -- upload the json files Orders_2023-09-01.json and Orders_2023-09-04.json to the object storage location used in the stage 16 | 17 | -- view files in the stage 18 | list @JSON_ORDERS_STAGE; 19 | 20 | -- create the extract table for the orders in raw (json) format 21 | create table JSON_ORDERS_EXT ( 22 | customer_orders variant, 23 | source_file_name varchar, 24 | load_ts timestamp 25 | ); 26 | 27 | -- copy data from the stage into the extract table 28 | copy into JSON_ORDERS_EXT 29 | from ( 30 | select 31 | $1, 32 | metadata$filename, 33 | current_timestamp() 34 | from @JSON_ORDERS_STAGE 35 | ) 36 | on_error = abort_statement 37 | ; 38 | 39 | select * from JSON_ORDERS_EXT; 40 | -- output should show two rows, one for each file you uploaded -------------------------------------------------------------------------------- /Chapter_11/Chapter_11_Part3_STG_layer_from_json_files.sql: -------------------------------------------------------------------------------- 1 | use role DATA_ENGINEER; 2 | use warehouse BAKERY_WH; 3 | use database BAKERY_DB; 4 | use schema STG; 5 | 6 | -- create a view in the STG schema flattening the json into a relational structure 7 | -- refer to Chapter 4 for an explanation of how the view is constructed 8 | create view JSON_ORDERS_STG as 9 | select 10 | E.customer_orders:"Customer"::varchar as customer, 11 | E.customer_orders:"Order date"::date as order_date, 12 | CO.value:"Delivery date"::date as delivery_date, 13 | DO.value:"Baked good type":: varchar as baked_good_type, 14 | DO.value:"Quantity"::number as quantity, 15 | source_file_name, 16 | load_ts 17 | from EXT.JSON_ORDERS_EXT E, 18 | lateral flatten (input => customer_orders:"Orders") CO, 19 | lateral flatten (input => CO.value:"Orders by day") DO; 20 | 21 | -- view data in the view 22 | select * 23 | from JSON_ORDERS_STG; 24 | -------------------------------------------------------------------------------- /Chapter_11/Chapter_11_Part4_STG_layer_from_database.sql: -------------------------------------------------------------------------------- 1 | use role DATA_ENGINEER; 2 | use warehouse BAKERY_WH; 3 | use database BAKERY_DB; 4 | use schema STG; 5 | 6 | -- create tables in the STG schema, simulating tables populated from the source system using a data integration tool or custom solution 7 | create table PARTNER ( 8 | partner_id integer, 9 | partner_name varchar, 10 | address varchar, 11 | rating varchar, 12 | valid_from date 13 | ); 14 | 15 | insert into PARTNER values 16 | (101, 'Coffee Pocket', '501 Courtney Wells', 'A', '2023-06-01'), 17 | (102, 'Lily''s Coffee', '2825 Joshua Forest', 'A', '2023-06-01'), 18 | (103, 'Crave Coffee', '538 Hayden Port', 'B', '2023-06-01'), 19 | (104, 'Best Burgers', '790 Friedman Valley', 'A', '2023-06-01'), 20 | (105, 'Page One Fast Food', '44864 Amber Walk', 'B', '2023-06-01'), 21 | (106, 'Jimmy''s Diner', '2613 Scott Mountains', 'A', '2023-06-01'), 22 | (107, 'Metro Fine Foods', '520 Castillo Valley', 'A', '2023-06-01'), 23 | (108, 'New Bistro', '494 Terry Spurs', 'A', '2023-06-01'), 24 | (109, 'Park Inn', '3692 Nelson Turnpike', 'A', '2023-06-01'), 25 | (110, 'Chef Supplies', '870 Anthony Hill', 'A', '2023-06-01'), 26 | (111, 'Farm Fresh', '23633 Melanie Ranch', 'A', '2023-06-01'), 27 | (112, 'Murphy Mill', '700 Darren Centers', 'A', '2023-06-01'); 28 | 29 | select * from PARTNER; 30 | 31 | create table PRODUCT ( 32 | product_id integer, 33 | product_name varchar, 34 | category varchar, 35 | min_quantity integer, 36 | price number(18,2), 37 | valid_from date 38 | ); 39 | 40 | insert into PRODUCT values 41 | (1, 'Baguette', 'Bread', 2, 2.5, '2023-06-01'), 42 | (2, 'Bagel', 'Bread', 6, 1.3, '2023-06-01'), 43 | (3, 'English Muffin', 'Bread', 6, 1.2, '2023-06-01'), 44 | (4, 'Croissant', 'Pastry', 4, 2.1, '2023-06-01'), 45 | (5, 'White Loaf', 'Bread', 1, 1.8, '2023-06-01'), 46 | (6, 'Hamburger Bun', 'Bread', 10, 0.9, '2023-06-01'), 47 | (7, 'Rye Loaf', 'Bread', 1, 3.2, '2023-06-01'), 48 | (8, 'Whole Wheat Loaf', 'Bread', 1, 2.8, '2023-06-01'), 49 | (9, 'Muffin', 'Pastry', 12, 3.0, '2023-06-01'), 50 | (10, 'Cinnamon Bun', 'Pastry', 6, 3.4, '2023-06-01'), 51 | (11, 'Blueberry Muffin', 'Pastry', 12, 3.6, '2023-06-01'), 52 | (12, 'Chocolate Muffin', 'Pastry', 12, 3.6, '2023-06-01'); 53 | 54 | select * from PRODUCT; 55 | -------------------------------------------------------------------------------- /Chapter_11/Chapter_11_Part5_DWH_layer.sql: -------------------------------------------------------------------------------- 1 | use role DATA_ENGINEER; 2 | use warehouse BAKERY_WH; 3 | use database BAKERY_DB; 4 | use schema DWH; 5 | 6 | -- create views PARTNER and PRODUCT in the DWH schema that select data from the STG schema 7 | create view PARTNER as 8 | select partner_id, partner_name, address, rating 9 | from STG.PARTNER; 10 | 11 | create view PRODUCT as 12 | select product_id, product_name, category, min_quantity, price, valid_from 13 | from STG.PRODUCT; 14 | 15 | -- create view ORDERS in the DWH schema that adds primary keys from the PARTNER and PRODUCT tables 16 | create view ORDERS as 17 | select PT.partner_id, PRD.product_id, ORD.delivery_date, 18 | ORD.order_date, ORD.quantity 19 | from STG.JSON_ORDERS_STG ORD 20 | inner join STG.PARTNER PT 21 | on PT.partner_name = ORD.customer 22 | inner join STG.PRODUCT PRD 23 | on PRD.product_name = ORD.baked_good_type; 24 | 25 | select * from ORDERS; 26 | -------------------------------------------------------------------------------- /Chapter_11/Chapter_11_Part6_MGMT_layer.sql: -------------------------------------------------------------------------------- 1 | use role DATA_ENGINEER; 2 | use warehouse BAKERY_WH; 3 | use database BAKERY_DB; 4 | use schema MGMT; 5 | 6 | -- create view in the MGMT schema that summarizes orders by delivery date and baked good type, adding the baked good category 7 | create view ORDERS_SUMMARY as 8 | select ORD.delivery_date, PRD.product_name, PRD.category, sum(ORD.quantity) as total_quantity 9 | from dwh.ORDERS ORD 10 | left join dwh.PRODUCT PRD 11 | on ORD.product_id = PRD.product_id 12 | group by all; 13 | 14 | -- use the DATA_ANALYST role to select data from the summary view 15 | use role DATA_ANALYST; 16 | use warehouse BAKERY_WH; 17 | use database BAKERY_DB; 18 | use schema MGMT; 19 | 20 | select * from ORDERS_SUMMARY; 21 | -------------------------------------------------------------------------------- /Chapter_11/Orders_2023-09-01.json: -------------------------------------------------------------------------------- 1 | { 2 | "Customer": "Park Inn", 3 | "Order date": "2023-09-01", 4 | "Orders": [ 5 | { 6 | "Delivery date": "2023-09-04", 7 | "Orders by day": [ 8 | { 9 | "Baked good type": "English Muffin", 10 | "Quantity": 30 11 | }, 12 | { 13 | "Baked good type": "Whole Wheat Loaf", 14 | "Quantity": 6 15 | }, 16 | { 17 | "Baked good type": "White Loaf", 18 | "Quantity": 4 19 | }, 20 | { 21 | "Baked good type": "Bagel", 22 | "Quantity": 25 23 | }, 24 | { 25 | "Baked good type": "Croissant", 26 | "Quantity": 36 27 | } 28 | ] 29 | } 30 | ] 31 | } -------------------------------------------------------------------------------- /Chapter_11/Orders_2023-09-04.json: -------------------------------------------------------------------------------- 1 | { 2 | "Customer": "Park Inn", 3 | "Order date": "2023-09-04", 4 | "Orders": [ 5 | { 6 | "Delivery date": "2023-09-05", 7 | "Orders by day": [ 8 | { 9 | "Baked good type": "English Muffin", 10 | "Quantity": 26 11 | }, 12 | { 13 | "Baked good type": "Whole Wheat Loaf", 14 | "Quantity": 4 15 | }, 16 | { 17 | "Baked good type": "Bagel", 18 | "Quantity": 22 19 | }, 20 | { 21 | "Baked good type": "Croissant", 22 | "Quantity": 30 23 | } 24 | ] 25 | }, 26 | { 27 | "Delivery date": "2023-09-06", 28 | "Orders by day": [ 29 | { 30 | "Baked good type": "English Muffin", 31 | "Quantity": 12 32 | }, 33 | { 34 | "Baked good type": "Whole Wheat Loaf", 35 | "Quantity": 5 36 | }, 37 | { 38 | "Baked good type": "Blueberry Muffin", 39 | "Quantity": 12 40 | }, 41 | { 42 | "Baked good type": "Croissant", 43 | "Quantity": 25 44 | } 45 | ] 46 | } 47 | ] 48 | } -------------------------------------------------------------------------------- /Chapter_12/Chapter_12_Part1_stream_orders.sql: -------------------------------------------------------------------------------- 1 | -- this chapter is a continuation of Chapter 10 2 | -- all scripts in Chapter 10 must be executed before continuing 3 | 4 | use role DATA_ENGINEER; 5 | use warehouse BAKERY_WH; 6 | use database BAKERY_DB; 7 | use schema EXT; 8 | 9 | -- delete all files from the object storage location used in the JSON_ORDERS_STAGE stage 10 | -- upload the json file Orders_2023-09-05.json to the object storage location 11 | 12 | -- recreate the table to remove any data from previous exercises 13 | create or replace table JSON_ORDERS_EXT ( 14 | customer_orders variant, 15 | source_file_name varchar, 16 | load_ts timestamp 17 | ); 18 | 19 | -- create a stream on the table 20 | create stream JSON_ORDERS_STREAM 21 | on table JSON_ORDERS_EXT; 22 | 23 | -- view data in the stream 24 | select * from JSON_ORDERS_STREAM; 25 | -- the stream should be empty 26 | 27 | -- view files in the stage 28 | list @JSON_ORDERS_STAGE; 29 | 30 | -- copy data from the stage into the JSON_ORDERS_EXT table 31 | copy into JSON_ORDERS_EXT 32 | from ( 33 | select 34 | $1, 35 | metadata$filename, 36 | current_timestamp() 37 | from @JSON_ORDERS_STAGE 38 | ) 39 | on_error = abort_statement 40 | ; 41 | -- the output from the copy command should indicate that data from the Orders_2023-09-05.json file was copied into the table 42 | 43 | -- check the data in the stream again 44 | select * from JSON_ORDERS_STREAM; 45 | -- the stream should contain the newly uploaded file 46 | 47 | 48 | -- create a staging table in the STG schema that will store the flattened semi-structured data from the extraction layer 49 | create table STG.JSON_ORDERS_TBL_STG ( 50 | customer varchar, 51 | order_date date, 52 | delivery_date date, 53 | baked_good_type varchar, 54 | quantity number, 55 | source_file_name varchar, 56 | load_ts timestamp 57 | ); 58 | 59 | -- insert the flattened data from the stream into the staging table 60 | insert into STG.JSON_ORDERS_TBL_STG 61 | select 62 | customer_orders:"Customer"::varchar as customer, 63 | customer_orders:"Order date"::date as order_date, 64 | CO.value:"Delivery date"::date as delivery_date, 65 | DO.value:"Baked good type":: varchar as baked_good_type, 66 | DO.value:"Quantity"::number as quantity, 67 | source_file_name, 68 | load_ts 69 | from EXT.JSON_ORDERS_STREAM, 70 | lateral flatten (input => customer_orders:"Orders") CO, 71 | lateral flatten (input => CO.value:"Orders by day") DO; 72 | 73 | -- check the data in the table: 74 | select * from STG.JSON_ORDERS_TBL_STG; 75 | -- should show 8 rows 76 | 77 | -- check the data in the stream again 78 | select * from JSON_ORDERS_STREAM; 79 | -- the stream should now be empty because it was consumed by the insert statement 80 | 81 | -- repeat with another file 82 | -- upload the json file Orders_2023-09-06.json to the object storage location 83 | 84 | -- view files in the stage 85 | list @JSON_ORDERS_STAGE; 86 | 87 | -- copy data from the stage into the JSON_ORDERS_EXT table 88 | copy into JSON_ORDERS_EXT 89 | from ( 90 | select 91 | $1, 92 | metadata$filename, 93 | current_timestamp() 94 | from @JSON_ORDERS_STAGE 95 | ) 96 | on_error = abort_statement 97 | ; 98 | -- the output from the copy command should indicate that data from the Orders_2023-09-06.json file was copied into the table 99 | 100 | -- check the data in the stream again 101 | select * from JSON_ORDERS_STREAM; 102 | -- the stream should contain the newly uploaded file 103 | 104 | -- perform the insert statement again 105 | insert into STG.JSON_ORDERS_TBL_STG 106 | select 107 | customer_orders:"Customer"::varchar as customer, 108 | customer_orders:"Order date"::date as order_date, 109 | CO.value:"Delivery date"::date as delivery_date, 110 | DO.value:"Baked good type":: varchar as baked_good_type, 111 | DO.value:"Quantity"::number as quantity, 112 | source_file_name, 113 | load_ts 114 | from EXT.JSON_ORDERS_STREAM, 115 | lateral flatten (input => customer_orders:"Orders") CO, 116 | lateral flatten (input => CO.value:"Orders by day") DO; 117 | 118 | -- should insert 4 rows 119 | -------------------------------------------------------------------------------- /Chapter_12/Chapter_12_Part2_stream_PRODUCT.sql: -------------------------------------------------------------------------------- 1 | use role DATA_ENGINEER; 2 | use warehouse BAKERY_WH; 3 | use database BAKERY_DB; 4 | use schema DWH; 5 | 6 | -- create a table in the data warehouse layer and populate initially with the data from the staging layer 7 | create table PRODUCT_TBL as select * from STG.PRODUCT; 8 | select * from PRODUCT_TBL; 9 | 10 | -- create a stream on the table in the staging layer 11 | use schema STG; 12 | create stream PRODUCT_STREAM on table PRODUCT; 13 | 14 | -- make some changes in the staging table: one update and one insert 15 | update PRODUCT 16 | set category = 'Pastry', valid_from = '2023-08-08' 17 | where product_id = 3; 18 | 19 | insert into PRODUCT values 20 | (13, 'Sourdough Bread', 'Bread', 1, 3.6, '2023-08-08'); 21 | 22 | -- view the contents of the stream 23 | select * from PRODUCT_STREAM; 24 | 25 | -- consume the stream by inserting into the target table 26 | insert into DWH.PRODUCT_TBL 27 | select product_id, product_name, category, min_quantity, price, valid_from 28 | from PRODUCT_STREAM 29 | where METADATA$ACTION = 'INSERT'; 30 | 31 | -- check that the stream is now empty 32 | select * from PRODUCT_STREAM; 33 | 34 | -- view data in the target table 35 | select * from DWH.PRODUCT_TBL; 36 | 37 | -- create a view in the data warehouse layer that calculates the end timestamp of the validity interval 38 | create view DWH.PRODUCT_VALID_TS as 39 | select 40 | product_id, 41 | product_name, 42 | category, 43 | min_quantity, 44 | price, 45 | valid_from, 46 | NVL( 47 | LEAD(valid_from) over (partition by product_id order by valid_from), 48 | '9999-12-31' 49 | ) as valid_to 50 | from DWH.PRODUCT_TBL 51 | order by product_id; 52 | 53 | select * from DWH.PRODUCT_VALID_TS; 54 | -------------------------------------------------------------------------------- /Chapter_12/Chapter_12_Part3_stream_PARTNER.sql: -------------------------------------------------------------------------------- 1 | use role DATA_ENGINEER; 2 | use warehouse BAKERY_WH; 3 | use database BAKERY_DB; 4 | use schema DWH; 5 | 6 | -- create a table in the data warehouse layer and populate initially with the data from the staging layer 7 | create table PARTNER_TBL as select * from STG.PARTNER; 8 | select * from PARTNER_TBL; 9 | 10 | -- create a stream on the table in the staging layer 11 | use schema STG; 12 | create stream PARTNER_STREAM on table PARTNER; 13 | 14 | -- make some changes in the staging table: one update 15 | update PARTNER 16 | set rating = 'A', valid_from = '2023-08-08' 17 | where partner_id = 103; 18 | 19 | -- view the contents of the stream 20 | select * from PARTNER_STREAM; 21 | 22 | -- consume the stream by inserting into the target table 23 | insert into DWH.PARTNER_TBL 24 | select partner_id, partner_name, address, rating, valid_from 25 | from PARTNER_STREAM 26 | where METADATA$ACTION = 'INSERT'; 27 | 28 | -- check that the stream is now empty 29 | select * from PARTNER_STREAM; 30 | 31 | -- view data in the target table 32 | select * from DWH.PARTNER_TBL; 33 | 34 | -- create a view in the data warehouse layer that calculates the end timestamp of the validity interval 35 | create view DWH.PARTNER_VALID_TS as 36 | select 37 | partner_id, 38 | partner_name, 39 | address, 40 | rating, 41 | valid_from, 42 | NVL( 43 | LEAD(valid_from) over (partition by partner_id order by valid_from), 44 | '9999-12-31' 45 | ) as valid_to 46 | from DWH.PARTNER_TBL 47 | order by partner_id; 48 | 49 | select * from DWH.PARTNER_VALID_TS; 50 | -------------------------------------------------------------------------------- /Chapter_12/Chapter_12_Part4_dynamic_table.sql: -------------------------------------------------------------------------------- 1 | -- normalize data in the data warehouse layer 2 | use role DATA_ENGINEER; 3 | use warehouse BAKERY_WH; 4 | use database BAKERY_DB; 5 | use schema DWH; 6 | 7 | -- construct a query that adds primary keys from the PARTNER and PRODUCT tables 8 | -- Listing 12.1. 9 | select PT.partner_id, PRD.product_id, ORD.delivery_date, 10 | ORD.order_date, ORD.quantity 11 | from STG.JSON_ORDERS_TBL_STG ORD 12 | inner join STG.PARTNER PT 13 | on PT.partner_name = ORD.customer 14 | inner join STG.PRODUCT PRD 15 | on PRD.product_name = ORD.baked_good_type; 16 | 17 | -- create a dynamic table ORDERS_TBL in the DWH schema using the previous query 18 | create dynamic table ORDERS_TBL 19 | target_lag = '1 minute' 20 | warehouse = BAKERY_WH 21 | as 22 | select PT.partner_id, PRD.product_id, ORD.delivery_date, 23 | ORD.order_date, ORD.quantity 24 | from STG.JSON_ORDERS_TBL_STG ORD 25 | inner join STG.PARTNER PT 26 | on PT.partner_name = ORD.customer 27 | inner join STG.PRODUCT PRD 28 | on PRD.product_name = ORD.baked_good_type; 29 | 30 | select * from ORDERS_TBL; 31 | 32 | -- summarize data for reporting 33 | -- Listing 12.2. 34 | select ORD.delivery_date, PRD.product_name, PRD.category, 35 | sum(ORD.quantity) as total_quantity 36 | from dwh.ORDERS_TBL ORD 37 | left join dwh.PRODUCT_TBL PRD 38 | on ORD.product_id = PRD.product_id 39 | group by all; 40 | 41 | -- select products that are valid currently 42 | select * from DWH.PRODUCT_VALID_TS 43 | where valid_to = '9999-12-31'; 44 | 45 | -- select products that were valid on August 1, 2023 46 | select * from DWH.PRODUCT_VALID_TS 47 | where valid_from <= '2023-08-01' and valid_to > '2023-08-01'; 48 | 49 | -- summarize data for reporting by taking the product category that is valid currently 50 | -- Listing 12.3. 51 | select ORD.delivery_date, PRD.product_name, PRD.category, 52 | sum(ORD.quantity) as total_quantity 53 | from dwh.ORDERS_TBL ORD 54 | left join (select * from dwh.PRODUCT_VALID_TS where valid_to = '9999-12-31') PRD 55 | on ORD.product_id = PRD.product_id 56 | group by all; 57 | 58 | use schema MGMT; 59 | create dynamic table ORDERS_SUMMARY_TBL 60 | target_lag = '1 minute' 61 | warehouse = BAKERY_WH 62 | as 63 | select ORD.delivery_date, PRD.product_name, PRD.category, 64 | sum(ORD.quantity) as total_quantity 65 | from dwh.ORDERS_TBL ORD 66 | left join (select * from dwh.PRODUCT_VALID_TS where valid_to = '9999-12-31') PRD 67 | on ORD.product_id = PRD.product_id 68 | group by all; 69 | 70 | select * from ORDERS_SUMMARY_TBL; 71 | -------------------------------------------------------------------------------- /Chapter_12/Orders_2023-09-05.json: -------------------------------------------------------------------------------- 1 | { 2 | "Customer": "Park Inn", 3 | "Order date": "2023-09-05", 4 | "Orders": [ 5 | { 6 | "Delivery date": "2023-09-06", 7 | "Orders by day": [ 8 | { 9 | "Baked good type": "English Muffin", 10 | "Quantity": 12 11 | }, 12 | { 13 | "Baked good type": "Whole Wheat Loaf", 14 | "Quantity": 6 15 | }, 16 | { 17 | "Baked good type": "Blueberry Muffin", 18 | "Quantity": 12 19 | }, 20 | { 21 | "Baked good type": "Croissant", 22 | "Quantity": 20 23 | } 24 | ] 25 | }, 26 | { 27 | "Delivery date": "2023-09-07", 28 | "Orders by day": [ 29 | { 30 | "Baked good type": "English Muffin", 31 | "Quantity": 15 32 | }, 33 | { 34 | "Baked good type": "Whole Wheat Loaf", 35 | "Quantity": 4 36 | }, 37 | { 38 | "Baked good type": "Bagel", 39 | "Quantity": 20 40 | }, 41 | { 42 | "Baked good type": "Croissant", 43 | "Quantity": 25 44 | } 45 | ] 46 | } 47 | ] 48 | } -------------------------------------------------------------------------------- /Chapter_12/Orders_2023-09-06.json: -------------------------------------------------------------------------------- 1 | { 2 | "Customer": "Park Inn", 3 | "Order date": "2023-09-06", 4 | "Orders": [ 5 | { 6 | "Delivery date": "2023-09-08", 7 | "Orders by day": [ 8 | { 9 | "Baked good type": "English Muffin", 10 | "Quantity": 15 11 | }, 12 | { 13 | "Baked good type": "Whole Wheat Loaf", 14 | "Quantity": 4 15 | }, 16 | { 17 | "Baked good type": "Blueberry Muffin", 18 | "Quantity": 12 19 | }, 20 | { 21 | "Baked good type": "Croissant", 22 | "Quantity": 25 23 | } 24 | ] 25 | } 26 | ] 27 | } -------------------------------------------------------------------------------- /Chapter_13/Chapter_13_Part1_create_schema.sql: -------------------------------------------------------------------------------- 1 | -- refer to Chapter_10_Part1_role_based_access_control.sql 2 | 3 | use role SYSADMIN; 4 | use database BAKERY_DB; 5 | 6 | -- create schema with managed access using the SYSADMIN role 7 | create schema ORCHESTRATION with managed access; 8 | 9 | -- grant full privileges on the ORCHESTRATION schema to the BAKERY_FULL role using the SECURITYADMIN role 10 | use role SECURITYADMIN; 11 | grant all on schema BAKERY_DB.ORCHESTRATION to role BAKERY_FULL; 12 | -------------------------------------------------------------------------------- /Chapter_13/Chapter_13_Part2_create_tasks_orders.sql: -------------------------------------------------------------------------------- 1 | -- this chapter is a continuation of Chapter 11 2 | -- all scripts in Chapter 11 must be executed before continuing 3 | 4 | -- use the ACCOUNTADMIN role to grant the execute task privilege to the DATA_ENGINEER role 5 | use role ACCOUNTADMIN; 6 | grant execute task on account to role DATA_ENGINEER; 7 | 8 | -- continue working with the DATA_ENGINEER role 9 | use role DATA_ENGINEER; 10 | use warehouse BAKERY_WH; 11 | use database BAKERY_DB; 12 | use schema ORCHESTRATION; 13 | 14 | -- create a task that performs the COPY INTO operation from the stage into the table 15 | create or replace task COPY_ORDERS_TASK 16 | warehouse = BAKERY_WH 17 | schedule = '10 M' 18 | as 19 | copy into EXT.JSON_ORDERS_EXT 20 | from ( 21 | select 22 | $1, 23 | metadata$filename, 24 | current_timestamp() 25 | from @EXT.JSON_ORDERS_STAGE 26 | ) 27 | on_error = abort_statement; 28 | 29 | -- execute the task once to verify that it is working 30 | execute task COPY_ORDERS_TASK; 31 | 32 | -- view the task history 33 | -- Listing 13.1 34 | select * 35 | from table(information_schema.task_history()) 36 | order by scheduled_time desc; 37 | 38 | -- create a task that inserts data from the stream into the staging table 39 | create or replace task INSERT_ORDERS_STG_TASK 40 | warehouse = 'BAKERY_WH' 41 | after COPY_ORDERS_TASK 42 | when 43 | system$stream_has_data('EXT.JSON_ORDERS_STREAM') 44 | as 45 | insert into STG.JSON_ORDERS_TBL_STG 46 | select 47 | customer_orders:"Customer"::varchar as customer, 48 | customer_orders:"Order date"::date as order_date, 49 | CO.value:"Delivery date"::date as delivery_date, 50 | DO.value:"Baked good type":: varchar as baked_good_type, 51 | DO.value:"Quantity"::number as quantity, 52 | source_file_name, 53 | load_ts 54 | from EXT.JSON_ORDERS_STREAM, 55 | lateral flatten (input => customer_orders:"Orders") CO, 56 | lateral flatten (input => CO.value:"Orders by day") DO; 57 | 58 | -- if you wish to test the task, remove the dependency on the COPY_ORDERS_TASK, execute the task, then add the dependency again 59 | -- remove the dependency 60 | alter task INSERT_ORDERS_STG_TASK remove after COPY_ORDERS_TASK; 61 | -- execute the task manually 62 | execute task INSERT_ORDERS_STG_TASK; 63 | -- view the task history 64 | select * 65 | from table(information_schema.task_history()) 66 | order by scheduled_time desc; 67 | -- add the dependency again 68 | alter task INSERT_ORDERS_STG_TASK add after COPY_ORDERS_TASK; 69 | -- execute the task once to verify that it is working 70 | 71 | -- enable the child and parent tasks 72 | alter task INSERT_ORDERS_STG_TASK resume; 73 | alter task COPY_ORDERS_TASK resume; 74 | 75 | -- upload the json file Orders_2023-09-07.json to the object storage location 76 | -- wait until the tasks execute on schedule 77 | 78 | -- view the task history 79 | select * 80 | from table(information_schema.task_history()) 81 | order by scheduled_time desc; 82 | 83 | -- suspend the task when you are done testing 84 | alter task COPY_ORDERS_TASK suspend; 85 | -------------------------------------------------------------------------------- /Chapter_13/Chapter_13_Part3_send_email.sql: -------------------------------------------------------------------------------- 1 | -- create a notification integration using the ACCOUNTADMIN role 2 | use role ACCOUNTADMIN; 3 | create notification integration PIPELINE_EMAIL_INT 4 | type = email 5 | enabled = true; 6 | 7 | -- grant usage on the integration to the DATA_ENGINEER role 8 | grant usage on integration PIPELINE_EMAIL_INT to role DATA_ENGINEER; 9 | 10 | -- use the DATA_ENGINEER role to send an email to yourself 11 | use role DATA_ENGINEER; 12 | call SYSTEM$SEND_EMAIL( 13 | 'PIPELINE_EMAIL_INT', 14 | 'firstname.lastname@youremail.com', -- substitute you email address 15 | 'The subject of the email from Snowflake', 16 | 'This is the body of the email.' 17 | ); 18 | -------------------------------------------------------------------------------- /Chapter_13/Chapter_13_Part4_create_task_graph.sql: -------------------------------------------------------------------------------- 1 | use role DATA_ENGINEER; 2 | use warehouse BAKERY_WH; 3 | use database BAKERY_DB; 4 | use schema ORCHESTRATION; 5 | 6 | -- create the root task 7 | create or replace task PIPELINE_START_TASK 8 | warehouse = BAKERY_WH 9 | schedule = '10 M' 10 | as 11 | call SYSTEM$SEND_EMAIL( 12 | 'PIPELINE_EMAIL_INT', 13 | 'firstname.lastname@youremail.com', -- substitute you email address 14 | 'Daily pipeline start', 15 | 'The daily pipeline started at ' || current_timestamp || '.' 16 | ); 17 | 18 | -- create a task that inserts the product data from the stream to the target table 19 | create or replace task INSERT_PRODUCT_TASK 20 | warehouse = BAKERY_WH 21 | after PIPELINE_START_TASK 22 | when 23 | system$stream_has_data('STG.PRODUCT_STREAM') 24 | as 25 | insert into DWH.PRODUCT_TBL 26 | select product_id, product_name, category, 27 | min_quantity, price, valid_from 28 | from STG.PRODUCT_STREAM 29 | where METADATA$ACTION = 'INSERT'; 30 | 31 | -- create a task that inserts the partner data from the stream to the target table 32 | create or replace task INSERT_PARTNER_TASK 33 | warehouse = BAKERY_WH 34 | after PIPELINE_START_TASK 35 | when 36 | system$stream_has_data('STG.PARTNER_STREAM') 37 | as 38 | insert into DWH.PARTNER_TBL 39 | select partner_id, partner_name, address, rating, valid_from 40 | from PARTNER_STREAM 41 | where METADATA$ACTION = 'INSERT'; 42 | 43 | -- create the finalizer task 44 | create task PIPELINE_END_TASK 45 | warehouse = BAKERY_WH 46 | finalize = PIPELINE_START_TASK 47 | as 48 | call SYSTEM$SEND_EMAIL( 49 | 'PIPELINE_EMAIL_INT', 50 | 'firstname.lastname@youremail.com', -- substitute you email address 51 | 'Daily pipeline end', 52 | 'The daily pipeline finished at ' || current_timestamp || '.' 53 | ); 54 | 55 | -- modify the COPY_ORDERS_TASK to remove the schedule and to run after the PIPELINE_START_TASK 56 | alter task COPY_ORDERS_TASK suspend; 57 | alter task COPY_ORDERS_TASK unset schedule; 58 | alter task COPY_ORDERS_TASK 59 | add after PIPELINE_START_TASK; 60 | 61 | -- resume all tasks 62 | alter task PIPELINE_END_TASK resume; 63 | alter task INSERT_PRODUCT_TASK resume; 64 | alter task INSERT_PARTNER_TASK resume; 65 | alter task INSERT_ORDERS_STG_TASK resume; 66 | alter task COPY_ORDERS_TASK resume; 67 | alter task PIPELINE_START_TASK resume; 68 | 69 | -- wait 10 minutes (or execute the task graph manually), then view the task history 70 | select * 71 | from table(information_schema.task_history()) 72 | order by scheduled_time desc; 73 | 74 | -- suspend the pipeline so it doesn't continue to consume resources and send emails 75 | alter task PIPELINE_START_TASK suspend; 76 | -------------------------------------------------------------------------------- /Chapter_13/Chapter_13_Part5_monitoring.sql: -------------------------------------------------------------------------------- 1 | use role DATA_ENGINEER; 2 | use warehouse BAKERY_WH; 3 | use database BAKERY_DB; 4 | use schema ORCHESTRATION; 5 | 6 | -- create a table to store logging information 7 | create or replace table PIPELINE_LOG ( 8 | run_group_id varchar, --CURRENT_TASK_GRAPH_RUN_GROUP_ID 9 | root_task_name varchar, --CURRENT_ROOT_TASK_NAME 10 | task_name varchar, --CURRENT_TASK_NAME 11 | log_ts timestamp, 12 | rows_processed number 13 | ); 14 | 15 | -- recreate the COPY_ORDERS_TASK with these changes: 16 | -- - set a schedule so that we can test it (we will modify to unset the schedule and add a dependency on the root task later) 17 | -- - enclose the body of the task with the BEGIN and END keywords 18 | -- - add an INSERT statement to insert data into the logging table 19 | create or replace task COPY_ORDERS_TASK 20 | warehouse = BAKERY_WH 21 | schedule = '10 M' 22 | as 23 | begin 24 | copy into EXT.JSON_ORDERS_EXT 25 | from ( 26 | select 27 | $1, 28 | metadata$filename, 29 | current_timestamp() 30 | from @EXT.JSON_ORDERS_STAGE 31 | ) 32 | on_error = abort_statement; 33 | 34 | insert into PIPELINE_LOG 35 | select 36 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_GRAPH_RUN_GROUP_ID'), 37 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_ROOT_TASK_NAME'), 38 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_NAME'), 39 | current_timestamp(), 40 | :SQLROWCOUNT; 41 | end; 42 | 43 | -- execute the task manually 44 | execute task COPY_ORDERS_TASK; 45 | 46 | -- check the TASK_HISTORY() 47 | select * 48 | from table(information_schema.task_history()) 49 | order by scheduled_time desc; 50 | 51 | -- verify that data was inserted into the logging table 52 | select * from PIPELINE_LOG; 53 | 54 | -- alter the task to unset the schedule and add a dependency on the root task 55 | alter task PIPELINE_START_TASK suspend; 56 | alter task COPY_ORDERS_TASK unset schedule; 57 | alter task COPY_ORDERS_TASK 58 | add after PIPELINE_START_TASK; 59 | 60 | -- recreate the INSERT_ORDERS_STG_TASK and insert data into the logging table 61 | create or replace task INSERT_ORDERS_STG_TASK 62 | warehouse = 'BAKERY_WH' 63 | after COPY_ORDERS_TASK 64 | when 65 | system$stream_has_data('EXT.JSON_ORDERS_STREAM') 66 | as 67 | begin 68 | insert into STG.JSON_ORDERS_TBL_STG 69 | select 70 | customer_orders:"Customer"::varchar as customer, 71 | customer_orders:"Order date"::date as order_date, 72 | CO.value:"Delivery date"::date as delivery_date, 73 | DO.value:"Baked good type":: varchar as baked_good_type, 74 | DO.value:"Quantity"::number as quantity, 75 | source_file_name, 76 | load_ts 77 | from EXT.JSON_ORDERS_STREAM, 78 | lateral flatten (input => customer_orders:"Orders") CO, 79 | lateral flatten (input => CO.value:"Orders by day") DO; 80 | 81 | insert into PIPELINE_LOG 82 | select 83 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_GRAPH_RUN_GROUP_ID'), 84 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_ROOT_TASK_NAME'), 85 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_NAME'), 86 | current_timestamp(), 87 | :SQLROWCOUNT; 88 | end; 89 | 90 | -- recreate the INSERT_PRODUCT_TASK and insert data into the logging table 91 | create or replace task INSERT_PRODUCT_TASK 92 | warehouse = BAKERY_WH 93 | after PIPELINE_START_TASK 94 | when 95 | system$stream_has_data('STG.PRODUCT_STREAM') 96 | as 97 | begin 98 | insert into DWH.PRODUCT_TBL 99 | select product_id, product_name, category, 100 | min_quantity, price, valid_from 101 | from STG.PRODUCT_STREAM 102 | where METADATA$ACTION = 'INSERT'; 103 | 104 | insert into PIPELINE_LOG 105 | select 106 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_GRAPH_RUN_GROUP_ID'), 107 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_ROOT_TASK_NAME'), 108 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_NAME'), 109 | current_timestamp(), 110 | :SQLROWCOUNT; 111 | end; 112 | 113 | -- recreate the INSERT_PARTNER_TASK and insert data into the logging table 114 | create or replace task INSERT_PARTNER_TASK 115 | warehouse = BAKERY_WH 116 | after PIPELINE_START_TASK 117 | when 118 | system$stream_has_data('STG.PARTNER_STREAM') 119 | as 120 | begin 121 | insert into DWH.PARTNER_TBL 122 | select partner_id, partner_name, address, rating, valid_from 123 | from STG.PARTNER_STREAM 124 | where METADATA$ACTION = 'INSERT'; 125 | 126 | insert into PIPELINE_LOG 127 | select 128 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_GRAPH_RUN_GROUP_ID'), 129 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_ROOT_TASK_NAME'), 130 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_NAME'), 131 | current_timestamp(), 132 | :SQLROWCOUNT; 133 | end; 134 | 135 | -- recreate the finalizer task by constructing a return_message string with the logging information from all tasks in the current run 136 | create or replace task PIPELINE_END_TASK 137 | warehouse = BAKERY_WH 138 | finalize = PIPELINE_START_TASK 139 | as 140 | declare 141 | return_message varchar := ''; 142 | begin 143 | let log_cur cursor for 144 | select task_name, rows_processed 145 | from PIPELINE_LOG 146 | where run_group_id = 147 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_GRAPH_RUN_GROUP_ID'); 148 | 149 | for log_rec in log_cur loop 150 | return_message := return_message || 151 | 'Task: '|| log_rec.task_name || 152 | ' Rows processed: ' || log_rec.rows_processed || '\n'; 153 | end loop; 154 | 155 | call SYSTEM$SEND_EMAIL( 156 | 'PIPELINE_EMAIL_INT', 157 | 'firstname.lastname@youremail.com', 158 | 'Daily pipeline end', 159 | 'The daily pipeline finished at ' || current_timestamp || '.' || 160 | '\n\n' || :return_message 161 | 162 | ); 163 | end; 164 | 165 | -- add data to the sources 166 | -- upload the Orders_2023-09-08.json file to the cloud storage location 167 | -- insert partner data 168 | insert into STG.PARTNER values( 169 | 113, 'Lazy Brunch', '1012 Astoria Avenue', 'A', '2023-09-01' 170 | ); 171 | -- update product data 172 | update STG.PRODUCT set min_quantity = 5 where product_id = 5; 173 | 174 | -- resume all tasks 175 | alter task PIPELINE_END_TASK resume; 176 | alter task INSERT_PRODUCT_TASK resume; 177 | alter task INSERT_PARTNER_TASK resume; 178 | alter task INSERT_ORDERS_STG_TASK resume; 179 | alter task COPY_ORDERS_TASK resume; 180 | alter task PIPELINE_START_TASK resume; 181 | 182 | -- execute the root task manually 183 | execute task PIPELINE_START_TASK; 184 | 185 | -- check the TASK_HISTORY() 186 | select * 187 | from table(information_schema.task_history()) 188 | order by scheduled_time desc; 189 | 190 | -- view data in the logging table 191 | select * from PIPELINE_LOG order by log_ts desc; 192 | 193 | -- suspend the pipeline so it doesn't continue to consume resources and send emails 194 | alter task PIPELINE_START_TASK suspend; -------------------------------------------------------------------------------- /Chapter_13/Orders_2023-09-07.json: -------------------------------------------------------------------------------- 1 | { 2 | "Customer": "Park Inn", 3 | "Order date": "2023-09-07", 4 | "Orders": [ 5 | { 6 | "Delivery date": "2023-09-11", 7 | "Orders by day": [ 8 | { 9 | "Baked good type": "English Muffin", 10 | "Quantity": 12 11 | }, 12 | { 13 | "Baked good type": "Whole Wheat Loaf", 14 | "Quantity": 5 15 | }, 16 | { 17 | "Baked good type": "White Loaf", 18 | "Quantity": 6 19 | }, 20 | { 21 | "Baked good type": "Croissant", 22 | "Quantity": 24 23 | } 24 | ] 25 | } 26 | ] 27 | } -------------------------------------------------------------------------------- /Chapter_13/Orders_2023-09-08.json: -------------------------------------------------------------------------------- 1 | { 2 | "Customer": "Park Inn", 3 | "Order date": "2023-09-08", 4 | "Orders": [ 5 | { 6 | "Delivery date": "2023-09-12", 7 | "Orders by day": [ 8 | { 9 | "Baked good type": "Blueberry Muffin", 10 | "Quantity": 6 11 | }, 12 | { 13 | "Baked good type": "Whole Wheat Loaf", 14 | "Quantity": 2 15 | }, 16 | { 17 | "Baked good type": "White Loaf", 18 | "Quantity": 4 19 | }, 20 | { 21 | "Baked good type": "Croissant", 22 | "Quantity": 16 23 | } 24 | ] 25 | } 26 | ] 27 | } -------------------------------------------------------------------------------- /Chapter_14/Chapter_14_Part1_data_quality_task_PARTNER.sql: -------------------------------------------------------------------------------- 1 | use role DATA_ENGINEER; 2 | use warehouse BAKERY_WH; 3 | use database BAKERY_DB; 4 | use schema STG; 5 | 6 | -- insert a new partner into the PARTNER staging table 7 | insert into STG.PARTNER values 8 | (114, 'Country Market', '12 Meadow Lane', null, '2023-10-10'); 9 | 10 | -- insert a new product into the PRODUCT staging table 11 | insert into STG.PRODUCT values 12 | (14, 'Banana Muffin', 'Cake', 12, 3.20, '2023-10-10'); 13 | 14 | -- execute the pipeline manually 15 | execute task ORCHESTRATION.PIPELINE_START_TASK; 16 | 17 | -- check the task_history() table function 18 | select * 19 | from table(information_schema.task_history()) 20 | order by scheduled_time desc; 21 | -- you should also receive two emails, one when the pipeline started and one when the pipeline completed 22 | 23 | -- create DQ schema 24 | -- refer to Chapter_10_Part1_role_based_access_control.sql 25 | 26 | use role SYSADMIN; 27 | use database BAKERY_DB; 28 | 29 | -- create schema with managed access using the SYSADMIN role 30 | create schema DQ with managed access; 31 | 32 | -- grant full privileges on the DQ schema to the BAKERY_FULL role using the SECURITYADMIN role 33 | use role SECURITYADMIN; 34 | grant all on schema BAKERY_DB.DQ to role BAKERY_FULL; 35 | 36 | use role DATA_ENGINEER; 37 | use schema DQ; 38 | -- create a table to store data quality information 39 | create or replace table DQ_LOG ( 40 | run_group_id varchar, --CURRENT_TASK_GRAPH_RUN_GROUP_ID 41 | root_task_name varchar, --CURRENT_ROOT_TASK_NAME 42 | task_name varchar, --CURRENT_TASK_NAME 43 | log_ts timestamp, 44 | database_name varchar, 45 | schema_name varchar, 46 | table_name varchar, 47 | dq_rule_name varchar, 48 | error_cnt number, 49 | error_info variant 50 | ); 51 | 52 | -- go back to the ORCHESTRATION schema to work on the tasks 53 | use schema ORCHESTRATION; 54 | 55 | -- select rows where the rating is null 56 | select * from DWH.PARTNER_TBL where rating is null; 57 | 58 | -- select an array of partner ids of all rows where the rating is null 59 | --Listing 14.1. 60 | select array_agg(PARTNER_ID) from DWH.PARTNER_TBL where rating is null; 61 | 62 | -- create a PARTNER_DQ_TASK 63 | -- schedule it every 10 minutes initially so you can execute it manually to test 64 | create or replace task PARTNER_DQ_TASK 65 | warehouse = BAKERY_WH 66 | schedule = '10 M' 67 | as 68 | declare 69 | error_info variant; 70 | error_cnt integer; 71 | begin 72 | select array_agg(PARTNER_ID) into error_info 73 | from DWH.PARTNER_TBL 74 | where rating is null; 75 | 76 | error_cnt := array_size(error_info); 77 | 78 | if (error_cnt > 0) then 79 | insert into DQ.DQ_LOG 80 | select 81 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_GRAPH_RUN_GROUP_ID'), 82 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_ROOT_TASK_NAME'), 83 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_NAME'), 84 | current_timestamp(), 85 | 'BAKERY_DB', 86 | 'DWH', 87 | 'PARTNER_TBL', 88 | 'Null values in the RATING column', 89 | :error_cnt, 90 | :error_info; 91 | end if; 92 | end; 93 | 94 | -- execute the task manually to test 95 | execute task PARTNER_DQ_TASK; 96 | 97 | -- check the task history 98 | select * 99 | from table(information_schema.task_history()) 100 | order by scheduled_time desc; 101 | 102 | -- view the data inserted into the DQ_LOG table 103 | select * from DQ.DQ_LOG; 104 | 105 | -- unset the schedule from the task and make it dependent on the INSERT_PARTNER_TASK 106 | alter task PARTNER_DQ_TASK unset schedule; 107 | alter task PARTNER_DQ_TASK 108 | add after INSERT_PARTNER_TASK; 109 | 110 | -- resume the task so it will run in the pipeline 111 | alter task PARTNER_DQ_TASK resume; 112 | 113 | -------------------------------------------------------------------------------- /Chapter_14/Chapter_14_Part2_data_quality_task_PRODUCT.sql: -------------------------------------------------------------------------------- 1 | use role DATA_ENGINEER; 2 | use warehouse BAKERY_WH; 3 | use database BAKERY_DB; 4 | use schema ORCHESTRATION; 5 | 6 | -- select rows where the CATEGORY is not one of the allowed values ('Bread', 'Pastry') 7 | --Listing 14.2 8 | select * 9 | from DWH.PRODUCT_TBL 10 | where category not in ('Bread', 'Pastry'); 11 | 12 | -- create the PRODUCT_DQ_TASK 13 | -- schedule it every 10 minutes initially so you can execute it manually to test 14 | create or replace task ORCHESTRATION.PRODUCT_DQ_TASK 15 | warehouse = BAKERY_WH 16 | schedule = '10 M' 17 | as 18 | declare 19 | error_cnt integer; 20 | error_info variant; 21 | begin 22 | select array_agg(product_id) into error_info 23 | from BAKERY_DB.DWH.PRODUCT_TBL 24 | where category not in ('Bread', 'Pastry'); 25 | 26 | error_cnt := array_size(error_info); 27 | 28 | if (error_cnt > 0) then 29 | insert into DQ.DQ_LOG 30 | select 31 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_GRAPH_RUN_GROUP_ID'), 32 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_ROOT_TASK_NAME'), 33 | SYSTEM$TASK_RUNTIME_INFO('CURRENT_TASK_NAME'), 34 | current_timestamp(), 35 | 'BAKERY_DB', 36 | 'DWH', 37 | 'PRODUCT_TBL', 38 | 'Invalid values in the CATEGORY column', 39 | :error_cnt, 40 | :error_info; 41 | end if; 42 | end; 43 | 44 | -- execute the task manually to test 45 | execute task PRODUCT_DQ_TASK; 46 | 47 | select * 48 | from table(information_schema.task_history()) 49 | order by scheduled_time desc; 50 | 51 | select * from DQ.DQ_LOG; 52 | 53 | -- unset the schedule from the task and make it dependent on the INSERT_PRODUCT_TASK 54 | alter task PRODUCT_DQ_TASK unset schedule; 55 | alter task PRODUCT_DQ_TASK 56 | add after INSERT_PRODUCT_TASK; 57 | 58 | -- resume the task so it will run in the pipeline 59 | alter task PRODUCT_DQ_TASK resume; 60 | 61 | -- before executing the pipeline, update the rows in the staging table so the streams have data 62 | update STG.PARTNER set valid_from = '2023-10-11' where partner_id = 114; 63 | update STG.PRODUCT set valid_from = '2023-10-11' where product_id = 14; 64 | 65 | -- execute the pipeline manually 66 | execute task ORCHESTRATION.PIPELINE_START_TASK; 67 | 68 | -- you can check the task_history() table function 69 | select * 70 | from table(information_schema.task_history()) 71 | order by scheduled_time desc; 72 | -- you should also receive two emails, one when the pipeline started and one when the pipeline completed 73 | 74 | -- check the DQ_LOG table 75 | select * from DQ.DQ_LOG order by log_ts desc; 76 | -------------------------------------------------------------------------------- /Chapter_14/Chapter_14_Part3_data_metric_functions.sql: -------------------------------------------------------------------------------- 1 | -- grant privileges to work with data metric functions 2 | use role ACCOUNTADMIN; 3 | grant database role SNOWFLAKE.DATA_METRIC_USER to role DATA_ENGINEER; 4 | grant EXECUTE DATA METRIC FUNCTION on account to role DATA_ENGINEER; 5 | grant application role SNOWFLAKE.DATA_QUALITY_MONITORING_VIEWER 6 | to role DATA_ENGINEER; 7 | 8 | -- continue to use the DATA_ENGINEER role 9 | use role DATA_ENGINEER; 10 | 11 | -- add the data metric schedule to the PARTNER_TBL table 12 | alter table DWH.PARTNER_TBL set DATA_METRIC_SCHEDULE = '5 MINUTE'; 13 | 14 | -- add a data metric function to the RATING column 15 | alter table DWH.PARTNER_TBL 16 | add data metric function SNOWFLAKE.CORE.NULL_COUNT 17 | on (rating); 18 | 19 | -- after about 5 minutes, check the output in the DATA_QUALITY_MONITORING_RESULTS table 20 | select measurement_time, table_name, metric_name, argument_names, value 21 | from SNOWFLAKE.LOCAL.DATA_QUALITY_MONITORING_RESULTS 22 | order by measurement_time desc; 23 | 24 | -- create a custom data metric function in the DQ schema 25 | create data metric function DQ.INVALID_CATEGORY( 26 | T table(CAT varchar)) 27 | returns integer 28 | as 29 | $$ 30 | select count(*) 31 | from T 32 | where CAT not in ('Bread', 'Pastry') 33 | $$; 34 | 35 | -- add the data metric schedule to the PRODUCT_TBL table 36 | alter table DWH.PRODUCT_TBL set DATA_METRIC_SCHEDULE = '5 MINUTE'; 37 | 38 | alter table DWH.PRODUCT_TBL 39 | add data metric function DQ.INVALID_CATEGORY 40 | on (category); 41 | 42 | -- after about 5 minutes, check the output in the DATA_QUALITY_MONITORING_RESULTS table 43 | select measurement_time, table_name, metric_name, argument_names, value 44 | from SNOWFLAKE.LOCAL.DATA_QUALITY_MONITORING_RESULTS 45 | order by measurement_time desc; 46 | 47 | -- unset the data metric schedule so that it doesn't execute every 5 minutes 48 | alter table DWH.PARTNER_TBL unset DATA_METRIC_SCHEDULE; 49 | alter table DWH.PRODUCT_TBL unset DATA_METRIC_SCHEDULE; 50 | 51 | -- set the data metric schedule to trigger on DML changes 52 | alter table DWH.PARTNER_TBL 53 | set DATA_METRIC_SCHEDULE = 'TRIGGER_ON_CHANGES'; 54 | alter table DWH.PRODUCT_TBL 55 | set DATA_METRIC_SCHEDULE = 'TRIGGER_ON_CHANGES'; 56 | 57 | -- view the data metric schedule on a table 58 | show parameters like 'DATA_METRIC_SCHEDULE' in table DWH.PRODUCT_TBL; 59 | 60 | -- view the data metric functions associated with a table 61 | select metric_name, ref_entity_name, ref_entity_domain, ref_arguments, schedule 62 | from table( 63 | INFORMATION_SCHEMA.DATA_METRIC_FUNCTION_REFERENCES( 64 | ref_entity_name => 'BAKERY_DB.DWH.PRODUCT_TBL', 65 | ref_entity_domain => 'table' 66 | ) 67 | ); 68 | -------------------------------------------------------------------------------- /Chapter_14/Chapter_14_Part4_alert.sql: -------------------------------------------------------------------------------- 1 | -- grant privilege to execute alerts 2 | use role ACCOUNTADMIN; 3 | grant execute alert on account to role DATA_ENGINEER; 4 | 5 | -- use the DATA_ENGINEER role to create an alert in the DQ schema 6 | use role DATA_ENGINEER; 7 | use warehouse BAKERY_WH; 8 | use database BAKERY_DB; 9 | use schema DQ; 10 | 11 | -- construct a query that sums the values reported by the data metric functions on all tables in the DWH schema within the last hour 12 | -- adding a filter that the query returns data only if the sum of the values is greater than 0 13 | -- Listing 14.3. 14 | select sum(value) 15 | from SNOWFLAKE.LOCAL.DATA_QUALITY_MONITORING_RESULTS 16 | where table_database = 'BAKERY_DB' 17 | and table_schema = 'DWH' 18 | and measurement_time > dateadd('hour', -1, current_timestamp()) 19 | having sum(value) > 0; 20 | 21 | -- create an alert that sends an email when the previous query returns data 22 | create alert DATA_QUALITY_MONITORING_ALERT 23 | warehouse = BAKERY_WH 24 | schedule = '5 minute' 25 | if (exists( 26 | select sum(value) 27 | from SNOWFLAKE.LOCAL.DATA_QUALITY_MONITORING_RESULTS 28 | where table_database = 'BAKERY_DB' 29 | and table_schema = 'DWH' 30 | and measurement_time > dateadd('hour', -1, current_timestamp()) 31 | having sum(value) > 0 32 | )) 33 | then 34 | call SYSTEM$SEND_EMAIL( 35 | 'PIPELINE_EMAIL_INT', 36 | 'firstname.lastname@youremail.com', -- substitute you email address 37 | 'Data quality monitoring alert', 38 | 'Data metric functions reported invalid values since ' || to_char(dateadd('hour', -1, current_timestamp()), 'YYYY-MM-DD HH24:MI:SS') || '.' 39 | ); 40 | 41 | -- resume the alert 42 | alter alert DATA_QUALITY_MONITORING_ALERT resume; 43 | 44 | -- check the execution status of the alert 45 | select * from table(information_schema.alert_history()) 46 | order by scheduled_time desc; 47 | 48 | -- suspend the alert 49 | alter alert DATA_QUALITY_MONITORING_ALERT suspend; 50 | -- change the schedule to execute every hour 51 | alter alert DATA_QUALITY_MONITORING_ALERT set schedule = '60 minute'; 52 | -------------------------------------------------------------------------------- /Chapter_14/Chapter_14_Part5_anomaly_detection.sql: -------------------------------------------------------------------------------- 1 | -- use the DATA_ENGINEER role to generate sample data in the STG schema 2 | use role DATA_ENGINEER; 3 | use warehouse BAKERY_WH; 4 | use database BAKERY_DB; 5 | use schema STG; 6 | 7 | -- generate random data representing supermarket orders 8 | create or replace table STG.COUNTRY_MARKET_ORDERS as 9 | with raw_data as ( 10 | select 11 | dateadd('day', uniform(1, 180, random()), '2023-11-01'::date) as delivery_date, 12 | uniform(1, 14, random()) as product_id, 13 | uniform(500, 1000, random()) as quantity 14 | from table(generator(rowcount => 10000)) 15 | ) 16 | select 17 | 'Country Market' as customer, 18 | delivery_date, 19 | product_id, 20 | sum(quantity) as quantity 21 | from raw_data 22 | group by all; 23 | 24 | -- simulate data anomalies 25 | -- eg. unusually low quantities between March 10 and March 15 26 | update STG.COUNTRY_MARKET_ORDERS 27 | set quantity = 0.2*quantity 28 | where delivery_date between '2024-03-10' and '2024-03-15'; 29 | -- missing data on March 21 and 22 30 | update STG.COUNTRY_MARKET_ORDERS 31 | set quantity = 0 32 | where delivery_date between '2024-03-21' and '2024-03-22'; 33 | 34 | -- view the quantity distribution as a line chart 35 | select * from STG.COUNTRY_MARKET_ORDERS; 36 | 37 | -- grant the create anomaly detection privilege to the DATA_ENGINEER role 38 | use role ACCOUNTADMIN; 39 | grant create SNOWFLAKE.ML.ANOMALY_DETECTION 40 | on schema BAKERY_DB.DQ 41 | to role DATA_ENGINEER; 42 | 43 | -- continue working with the DATA_ENGINEER role in the DQ schema 44 | use role DATA_ENGINEER; 45 | use schema DQ; 46 | 47 | -- historical data before March 1 on which the model trains 48 | create or replace view ORDERS_HISTORICAL_DATA as 49 | select delivery_date::timestamp as delivery_ts, 50 | sum(quantity) as quantity 51 | from STG.COUNTRY_MARKET_ORDERS 52 | where delivery_date < '2024-03-01' 53 | group by delivery_ts; 54 | 55 | -- new data after March 1 on which the model looks for anomalies based on historical trends 56 | create or replace view ORDERS_NEW_DATA as 57 | select delivery_date::timestamp as delivery_ts, 58 | sum(quantity) as quantity 59 | from STG.COUNTRY_MARKET_ORDERS 60 | where delivery_date >= '2024-03-01' 61 | group by delivery_ts; 62 | 63 | -- train the model on historical data 64 | create or replace SNOWFLAKE.ML.ANOMALY_DETECTION orders_model( 65 | input_data => SYSTEM$REFERENCE('VIEW', 'ORDERS_HISTORICAL_DATA'), 66 | timestamp_colname => 'delivery_ts', 67 | target_colname => 'quantity', 68 | label_colname => ''); 69 | 70 | -- calculate anomalies on new data 71 | call orders_model!DETECT_ANOMALIES( 72 | input_data => SYSTEM$REFERENCE('VIEW', 'ORDERS_NEW_DATA'), 73 | timestamp_colname =>'delivery_ts', 74 | target_colname => 'quantity' 75 | ); 76 | 77 | -- save the output to a table 78 | create or replace table ORDERS_MODEL_ANOMALIES as 79 | select * from table(result_scan(last_query_id())); 80 | -------------------------------------------------------------------------------- /Chapter_15/Chapter_15_Part1_setup.sql: -------------------------------------------------------------------------------- 1 | use role SYSADMIN; 2 | create database ADMIN_DB; 3 | create schema GIT_INTEGRATION; 4 | 5 | -- continue to use the SYSADMIN role to keep the exercise simple 6 | -- otherwise, create a custom role and grant it the CREATE SECRET ON SCHEMA privilege 7 | -- create the git secret 8 | create or replace secret GIT_SECRET 9 | type = password 10 | username = 11 | password = 12 | ; 13 | 14 | -- use the ACCOUNTADMIN role to create an API integration to keep the exercise simple 15 | -- otherwise, create a custom role and grant it the CREATE INTEGRATION ON ACCOUNT privilege 16 | use role ACCOUNTADMIN; 17 | -- create an API integration with the Git account 18 | create or replace api integration GIT_API_INTEGRATION 19 | API_PROVIDER = git_https_api 20 | -- replace your Git account in the next line (example 'https://github.com/mferle') 21 | API_ALLOWED_PREFIXES = ('https:///') 22 | ALLOWED_AUTHENTICATION_SECRETS = (GIT_SECRET) 23 | ENABLED = TRUE; 24 | 25 | -- grant usage on the integration to the SYSADMIN role 26 | grant usage on integration GIT_API_INTEGRATION to role SYSADMIN; 27 | 28 | -- continue to use the SYSADMIN role to keep the exercise simple 29 | -- otherwise, create a custom role and grant it the CREATE GIT REPOSITORY ON SCHEMA privilege 30 | use role SYSADMIN; 31 | -- create a Git repository using the API integration and the secret 32 | create or replace git repository SF_DE 33 | api_integration = GIT_API_INTEGRATION 34 | git_credentials = GIT_SECRET 35 | -- replace the URL to your repository in the next line 36 | ORIGIN = 'https:////snowflake-data-engineering.git'; 37 | 38 | -- fetch the latest from the Git repository 39 | alter git repository SF_DE fetch; 40 | 41 | -- show branches in the Git repository 42 | show git branches in SF_DE; 43 | 44 | -- list the files in the repository main branch 45 | ls @SF_DE/branches/main; 46 | -------------------------------------------------------------------------------- /Chapter_15/Chapter_15_Part2_RBAC.sql: -------------------------------------------------------------------------------- 1 | use role SYSADMIN; 2 | create warehouse if not exists BAKERY_WH with warehouse_size = 'XSMALL'; 3 | create database if not exists BAKERY_DB; 4 | use database BAKERY_DB; 5 | 6 | -- create schema with managed access using the SYSADMIN role 7 | create or replace schema EXT with managed access; 8 | create or replace schema STG with managed access; 9 | create or replace schema DWH with managed access; 10 | create or replace schema MGMT with managed access; 11 | create or replace schema ORCHESTRATION with managed access; 12 | 13 | -- using the USERADMIN role (because this role has the create or replace role privilege) 14 | use role USERADMIN; 15 | 16 | -- create the access roles for full access and for read-only access 17 | create or replace role BAKERY_FULL; 18 | 19 | -- to make the script repeatable: 20 | -- must use the SECURITYADMIN role (because this role has the MANAGE GRANTS privilege) 21 | -- to drop the BAKERY_READ role (because this role has future grants) 22 | use role SECURITYADMIN; 23 | drop role if exists BAKERY_READ; 24 | 25 | -- go back to the USERADMIN role 26 | use role USERADMIN; 27 | create or replace role BAKERY_READ; 28 | 29 | -- create the functional roles 30 | create or replace role DATA_ENGINEER; 31 | create or replace role DATA_ANALYST; 32 | 33 | -- using the SECURITYADMIN role (because this role has the MANAGE GRANTS privilege) 34 | use role SECURITYADMIN; 35 | 36 | -- grant privileges to each of the access roles 37 | 38 | -- grant full privileges on database BAKERY_DB to the BAKERY_FULL role 39 | grant usage on database BAKERY_DB to role BAKERY_FULL; 40 | grant usage on all schemas in database BAKERY_DB to role BAKERY_FULL; 41 | grant all on schema BAKERY_DB.EXT to role BAKERY_FULL; 42 | grant all on schema BAKERY_DB.STG to role BAKERY_FULL; 43 | grant all on schema BAKERY_DB.DWH to role BAKERY_FULL; 44 | grant all on schema BAKERY_DB.MGMT to role BAKERY_FULL; 45 | grant all on schema BAKERY_DB.ORCHESTRATION to role BAKERY_FULL; 46 | 47 | -- grant read-only privileges on database BAKERY_DB to the BAKERY_READ role 48 | grant usage on database BAKERY_DB to role BAKERY_READ; 49 | grant usage on all schemas in database BAKERY_DB to role BAKERY_READ; 50 | -- grant read-only privileges on the MGMT schema to the BAKERY_READ role 51 | grant select on all tables in schema BAKERY_DB.MGMT to role BAKERY_READ; 52 | grant select on all views in schema BAKERY_DB.MGMT to role BAKERY_READ; 53 | 54 | -- grant future privileges 55 | grant select on future tables in schema BAKERY_DB.MGMT to role BAKERY_READ; 56 | grant select on future views in schema BAKERY_DB.MGMT to role BAKERY_READ; 57 | 58 | -- grant access roles to functional roles 59 | -- grant the BAKERY_FULL role to the DATA_ENGINEER role 60 | grant role BAKERY_FULL to role DATA_ENGINEER; 61 | -- grant the BAKERY_READ role to the DATA_ANALYST role 62 | grant role BAKERY_READ to role DATA_ANALYST; 63 | 64 | -- grant both functional roles to the SYSADMIN role 65 | grant role DATA_ENGINEER to role SYSADMIN; 66 | grant role DATA_ANALYST to role SYSADMIN; 67 | 68 | -- grant usage on the BAKERY_WH warehouse to the functional roles 69 | grant usage on warehouse BAKERY_WH to role DATA_ENGINEER; 70 | grant usage on warehouse BAKERY_WH to role DATA_ANALYST; 71 | 72 | -- grant privileges that allow the DATA_ENGINEER role to use the GIT integration 73 | use role SYSADMIN; 74 | grant usage on database ADMIN_DB to role DATA_ENGINEER; 75 | grant usage on schema ADMIN_DB.GIT_INTEGRATION to role DATA_ENGINEER; 76 | grant read on git repository ADMIN_DB.GIT_INTEGRATION.SF_DE to role DATA_ENGINEER; 77 | grant write on git repository ADMIN_DB.GIT_INTEGRATION.SF_DE to role DATA_ENGINEER; 78 | 79 | -- grant the DATA_ENGINEER role the privilege to execute tasks 80 | use role ACCOUNTADMIN; 81 | grant execute task on account to role DATA_ENGINEER; 82 | 83 | -- create a notification integration for sending emails 84 | create or replace notification integration PIPELINE_EMAIL_INT 85 | type = email 86 | enabled = true; 87 | 88 | -- grant usage on the integration to the DATA_ENGINEER role 89 | grant usage on integration PIPELINE_EMAIL_INT to role DATA_ENGINEER; -------------------------------------------------------------------------------- /Chapter_15/Chapter_15_Part3_execute_RBAC.sql: -------------------------------------------------------------------------------- 1 | -- execute the RBAC script using the SYSADMIN role 2 | use role SYSADMIN; 3 | use database ADMIN_DB; 4 | use schema GIT_INTEGRATION; 5 | 6 | alter git repository SF_DE fetch; 7 | 8 | execute immediate from @SF_DE/branches/main/Chapter_15/Chapter_15_Part2_RBAC.sql; 9 | 10 | -- grant the functional roles to the users who perform those business functions 11 | -- in this exercise we grant both functional roles to ourself to be able to test them 12 | use role USERADMIN; 13 | grant role DATA_ENGINEER to user ; 14 | grant role DATA_ANALYST to user ; 15 | 16 | -- switch to the DATA_ENGINEER role and verify that you can list files from the Git repository stage 17 | use role DATA_ENGINEER; 18 | use database ADMIN_DB; 19 | use schema GIT_INTEGRATION; 20 | ls @SF_DE/branches/main; 21 | -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/deploy_objects.sql: -------------------------------------------------------------------------------- 1 | use database BAKERY_DB; 2 | -- EXT schema 3 | execute immediate from '../Snowflake_objects/schemas/ext/stages/create_JSON_ORDERS_STAGE.sql'; 4 | execute immediate from '../Snowflake_objects/schemas/ext/tables/create_JSON_ORDERS_EXT.sql'; 5 | execute immediate from '../Snowflake_objects/schemas/ext/streams/create_JSON_ORDERS_STREAM.sql'; 6 | 7 | -- STG schema 8 | execute immediate from '../Snowflake_objects/schemas/stg/tables/create_JSON_ORDERS_TBL_STG.sql'; 9 | execute immediate from '../Snowflake_objects/schemas/stg/tables/create_PARTNER.sql'; 10 | execute immediate from '../Snowflake_objects/schemas/stg/tables/create_PRODUCT.sql'; 11 | execute immediate from '../Snowflake_objects/schemas/stg/streams/create_PARTNER_STREAM.sql'; 12 | execute immediate from '../Snowflake_objects/schemas/stg/streams/create_PRODUCT_STREAM.sql'; 13 | 14 | -- DWH schema 15 | execute immediate from '../Snowflake_objects/schemas/dwh/tables/create_PARTNER_TBL.sql'; 16 | execute immediate from '../Snowflake_objects/schemas/dwh/tables/create_PRODUCT_TBL.sql'; 17 | execute immediate from '../Snowflake_objects/schemas/dwh/views/create_PRODUCT_VALID_TS.sql'; 18 | execute immediate from '../Snowflake_objects/schemas/dwh/dynamic_tables/create_ORDERS_TBL.sql'; 19 | 20 | -- MGMT schema 21 | execute immediate from '../Snowflake_objects/schemas/mgmt/dynamic_tables/create_ORDERS_SUMMARY_TBL.sql'; 22 | 23 | -- ORCHESTRATION schema 24 | execute immediate from '../Snowflake_objects/schemas/orchestration/tasks/create_PIPELINE_START_TASK.sql'; 25 | execute immediate from '../Snowflake_objects/schemas/orchestration/tasks/create_COPY_ORDERS_TASK.sql'; 26 | execute immediate from '../Snowflake_objects/schemas/orchestration/tasks/create_INSERT_ORDERS_STG_TASK.sql'; 27 | execute immediate from '../Snowflake_objects/schemas/orchestration/tasks/create_INSERT_PARTNER_TASK.sql'; 28 | execute immediate from '../Snowflake_objects/schemas/orchestration/tasks/create_INSERT_PRODUCT_TASK.sql'; 29 | execute immediate from '../Snowflake_objects/schemas/orchestration/tasks/create_PIPELINE_END_TASK.sql'; 30 | 31 | 32 | -- snow sql -q "alter git repository ADMIN_DB.GIT_INTEGRATION.SF_DE fetch" 33 | -- snow sql -q "execute immediate from @ADMIN_DB.GIT_INTEGRATION.SF_DE/branches/main/Chapter_15/Snowflake_objects/deploy_objects.sql" -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/resume_tasks.sql: -------------------------------------------------------------------------------- 1 | use database BAKERY_DB; 2 | -- resume all tasks 3 | alter task ORCHESTRATION.PIPELINE_END_TASK resume; 4 | alter task ORCHESTRATION.INSERT_PRODUCT_TASK resume; 5 | alter task ORCHESTRATION.INSERT_PARTNER_TASK resume; 6 | alter task ORCHESTRATION.INSERT_ORDERS_STG_TASK resume; 7 | alter task ORCHESTRATION.COPY_ORDERS_TASK resume; 8 | alter task ORCHESTRATION.PIPELINE_START_TASK resume; 9 | 10 | -- snow sql -q "alter git repository ADMIN_DB.GIT_INTEGRATION.SF_DE fetch" 11 | -- snow sql -q "execute immediate from @ADMIN_DB.GIT_INTEGRATION.SF_DE/branches/main/Chapter_15/Snowflake_objects/resume_tasks.sql" -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/dwh/dynamic_tables/create_ORDERS_TBL.sql: -------------------------------------------------------------------------------- 1 | -- create a dynamic table ORDERS_TBL in the DWH schema that normalizes the data from the STG schema 2 | create or replace dynamic table DWH.ORDERS_TBL 3 | target_lag = '1 minute' 4 | warehouse = BAKERY_WH 5 | as 6 | select PT.partner_id, PRD.product_id, ORD.delivery_date, 7 | ORD.order_date, ORD.quantity 8 | from STG.JSON_ORDERS_TBL_STG ORD 9 | inner join STG.PARTNER PT 10 | on PT.partner_name = ORD.customer 11 | inner join STG.PRODUCT PRD 12 | on PRD.product_name = ORD.baked_good_type; 13 | 14 | -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/dwh/tables/create_PARTNER_TBL.sql: -------------------------------------------------------------------------------- 1 | -- create a table in the data warehouse layer and populate initially with the data from the staging layer 2 | create or replace table DWH.PARTNER_TBL as 3 | select * from STG.PARTNER; -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/dwh/tables/create_PRODUCT_TBL.sql: -------------------------------------------------------------------------------- 1 | -- create a table in the data warehouse layer and populate initially with the data from the staging layer 2 | create or replace table DWH.PRODUCT_TBL as 3 | select * from STG.PRODUCT; -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/dwh/views/create_PRODUCT_VALID_TS.sql: -------------------------------------------------------------------------------- 1 | -- create a view in the data warehouse layer that calculates the end timestamp of the validity interval 2 | create or replace view DWH.PRODUCT_VALID_TS as 3 | select 4 | product_id, 5 | product_name, 6 | category, 7 | min_quantity, 8 | price, 9 | valid_from, 10 | NVL( 11 | LEAD(valid_from) over (partition by product_id order by valid_from), 12 | '9999-12-31' 13 | ) as valid_to 14 | from DWH.PRODUCT_TBL 15 | order by product_id; -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/ext/stages/create_JSON_ORDERS_STAGE.sql: -------------------------------------------------------------------------------- 1 | -- create an external stage named JSON_ORDERS_STAGE using the PARK_INN_INTEGRATION as described in Chapter 4 2 | -- be sure to create the external stage with the JSON file format, eg. file_format = (type = json) 3 | 4 | --create stage EXT.JSON_ORDERS_STAGE 5 | -- storage_integration = PARK_INN_INTEGRATION 6 | -- url = 'azure://bakeryorders001.blob.core.windows.net/orderfiles' 7 | -- file_format = (type = json); 8 | 9 | -- quick alternative if you don't have the storage integration object: create an internal stage 10 | create or replace stage EXT.JSON_ORDERS_STAGE file_format = (type = json); -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/ext/streams/create_JSON_ORDERS_STREAM.sql: -------------------------------------------------------------------------------- 1 | -- create a stream on the JSON_ORDERS_EXT table 2 | create or replace stream EXT.JSON_ORDERS_STREAM 3 | on table EXT.JSON_ORDERS_EXT; 4 | -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/ext/tables/create_JSON_ORDERS_EXT.sql: -------------------------------------------------------------------------------- 1 | -- create the extract table for the orders in raw (json) format 2 | create or alter table EXT.JSON_ORDERS_EXT ( 3 | customer_orders variant, 4 | source_file_name varchar, 5 | load_ts timestamp 6 | ); -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/mgmt/dynamic_tables/create_ORDERS_SUMMARY_TBL.sql: -------------------------------------------------------------------------------- 1 | -- create a dynamic table in the MGMT schema that summarizes order information for reporting 2 | create or replace dynamic table MGMT.ORDERS_SUMMARY_TBL 3 | target_lag = '1 minute' 4 | warehouse = BAKERY_WH 5 | as 6 | select ORD.delivery_date, PRD.product_name, PRD.category, 7 | sum(ORD.quantity) as total_quantity 8 | from DWH.ORDERS_TBL ORD 9 | left join (select * from DWH.PRODUCT_VALID_TS where valid_to = '9999-12-31') PRD 10 | on ORD.product_id = PRD.product_id 11 | group by all; -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/orchestration/tasks/create_COPY_ORDERS_TASK.sql: -------------------------------------------------------------------------------- 1 | -- create a task that performs the COPY INTO operation from the stage into the table 2 | create or replace task BAKERY_DB.ORCHESTRATION.COPY_ORDERS_TASK 3 | warehouse = BAKERY_WH 4 | after BAKERY_DB.ORCHESTRATION.PIPELINE_START_TASK 5 | as 6 | copy into EXT.JSON_ORDERS_EXT 7 | from ( 8 | select 9 | $1, 10 | metadata$filename, 11 | current_timestamp() 12 | from @EXT.JSON_ORDERS_STAGE 13 | ) 14 | on_error = abort_statement; -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/orchestration/tasks/create_INSERT_ORDERS_STG_TASK.sql: -------------------------------------------------------------------------------- 1 | -- create a task that inserts data from the stream into the staging table 2 | create or replace task BAKERY_DB.ORCHESTRATION.INSERT_ORDERS_STG_TASK 3 | warehouse = 'BAKERY_WH' 4 | after BAKERY_DB.ORCHESTRATION.COPY_ORDERS_TASK 5 | when 6 | system$stream_has_data('EXT.JSON_ORDERS_STREAM') 7 | as 8 | insert into STG.JSON_ORDERS_TBL_STG 9 | select 10 | customer_orders:"Customer"::varchar as customer, 11 | customer_orders:"Order date"::date as order_date, 12 | CO.value:"Delivery date"::date as delivery_date, 13 | DO.value:"Baked good type":: varchar as baked_good_type, 14 | DO.value:"Quantity"::number as quantity, 15 | source_file_name, 16 | load_ts 17 | from EXT.JSON_ORDERS_STREAM, 18 | lateral flatten (input => customer_orders:"Orders") CO, 19 | lateral flatten (input => CO.value:"Orders by day") DO; 20 | -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/orchestration/tasks/create_INSERT_PARTNER_TASK.sql: -------------------------------------------------------------------------------- 1 | -- create a task that inserts the partner data from the stream to the target table 2 | create or replace task BAKERY_DB.ORCHESTRATION.INSERT_PARTNER_TASK 3 | warehouse = BAKERY_WH 4 | after BAKERY_DB.ORCHESTRATION.PIPELINE_START_TASK 5 | when 6 | system$stream_has_data('STG.PARTNER_STREAM') 7 | as 8 | insert into DWH.PARTNER_TBL 9 | select partner_id, partner_name, address, rating, valid_from 10 | from PARTNER_STREAM 11 | where METADATA$ACTION = 'INSERT'; 12 | -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/orchestration/tasks/create_INSERT_PRODUCT_TASK.sql: -------------------------------------------------------------------------------- 1 | -- create a task that inserts the product data from the stream to the target table 2 | create or replace task BAKERY_DB.ORCHESTRATION.INSERT_PRODUCT_TASK 3 | warehouse = BAKERY_WH 4 | after BAKERY_DB.ORCHESTRATION.PIPELINE_START_TASK 5 | when 6 | system$stream_has_data('STG.PRODUCT_STREAM') 7 | as 8 | insert into DWH.PRODUCT_TBL 9 | select product_id, product_name, category, 10 | min_quantity, price, valid_from 11 | from STG.PRODUCT_STREAM 12 | where METADATA$ACTION = 'INSERT'; -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/orchestration/tasks/create_PIPELINE_END_TASK.sql: -------------------------------------------------------------------------------- 1 | -- create the finalizer task 2 | create or replace task BAKERY_DB.ORCHESTRATION.PIPELINE_END_TASK 3 | warehouse = BAKERY_WH 4 | finalize = BAKERY_DB.ORCHESTRATION.PIPELINE_START_TASK 5 | as 6 | call SYSTEM$SEND_EMAIL( 7 | 'PIPELINE_EMAIL_INT', 8 | 'firstname.lastname@youremail.com', -- substitute you email address 9 | 'Daily pipeline end', 10 | 'The daily pipeline finished at ' || current_timestamp || '.' 11 | ); -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/orchestration/tasks/create_PIPELINE_START_TASK.sql: -------------------------------------------------------------------------------- 1 | -- create the root task 2 | create or replace task BAKERY_DB.ORCHESTRATION.PIPELINE_START_TASK 3 | warehouse = BAKERY_WH 4 | schedule = '10 M' 5 | as 6 | call SYSTEM$SEND_EMAIL( 7 | 'PIPELINE_EMAIL_INT', 8 | 'firstname.lastname@youremail.com', -- substitute you email address 9 | 'Daily pipeline start', 10 | 'The daily pipeline started at ' || current_timestamp || '.' 11 | ); -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/stg/streams/create_PARTNER_STREAM.sql: -------------------------------------------------------------------------------- 1 | -- create a stream on the PRODUCT table 2 | create or replace stream STG.PARTNER_STREAM on table STG.PARTNER; -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/stg/streams/create_PRODUCT_STREAM.sql: -------------------------------------------------------------------------------- 1 | -- create a stream on the PRODUCT table 2 | create or replace stream STG.PRODUCT_STREAM on table STG.PRODUCT; -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/stg/tables/create_JSON_ORDERS_TBL_STG.sql: -------------------------------------------------------------------------------- 1 | -- create a staging table in the STG schema that will store the flattened semi-structured data from the extraction layer 2 | create or alter table STG.JSON_ORDERS_TBL_STG ( 3 | customer varchar, 4 | order_date date, 5 | delivery_date date, 6 | baked_good_type varchar, 7 | quantity number, 8 | source_file_name varchar, 9 | load_ts timestamp 10 | ); -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/stg/tables/create_PARTNER.sql: -------------------------------------------------------------------------------- 1 | -- create tables in the STG schema, simulating tables populated from the source system using a data integration tool or custom solution 2 | create or alter table STG.PARTNER ( 3 | partner_id integer, 4 | partner_name varchar, 5 | address varchar, 6 | rating varchar, 7 | valid_from date 8 | ); 9 | 10 | -- delete the data in case it already exists to avoid duplication 11 | delete from STG.PARTNER; 12 | 13 | insert into STG.PARTNER values 14 | (101, 'Coffee Pocket', '501 Courtney Wells', 'A', '2023-06-01'), 15 | (102, 'Lily''s Coffee', '2825 Joshua Forest', 'A', '2023-06-01'), 16 | (103, 'Crave Coffee', '538 Hayden Port', 'B', '2023-06-01'), 17 | (104, 'Best Burgers', '790 Friedman Valley', 'A', '2023-06-01'), 18 | (105, 'Page One Fast Food', '44864 Amber Walk', 'B', '2023-06-01'), 19 | (106, 'Jimmy''s Diner', '2613 Scott Mountains', 'A', '2023-06-01'), 20 | (107, 'Metro Fine Foods', '520 Castillo Valley', 'A', '2023-06-01'), 21 | (108, 'New Bistro', '494 Terry Spurs', 'A', '2023-06-01'), 22 | (109, 'Park Inn', '3692 Nelson Turnpike', 'A', '2023-06-01'), 23 | (110, 'Chef Supplies', '870 Anthony Hill', 'A', '2023-06-01'), 24 | (111, 'Farm Fresh', '23633 Melanie Ranch', 'A', '2023-06-01'), 25 | (112, 'Murphy Mill', '700 Darren Centers', 'A', '2023-06-01'); 26 | -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/schemas/stg/tables/create_PRODUCT.sql: -------------------------------------------------------------------------------- 1 | -- create tables in the STG schema, simulating tables populated from the source system using a data integration tool or custom solution 2 | create or alter table STG.PRODUCT ( 3 | product_id integer, 4 | product_name varchar, 5 | category varchar, 6 | min_quantity integer, 7 | price number(18,2), 8 | valid_from date 9 | ); 10 | 11 | -- delete the data in case it already exists to avoid duplication 12 | delete from STG.PRODUCT; 13 | 14 | insert into STG.PRODUCT values 15 | (1, 'Baguette', 'Bread', 2, 2.5, '2023-06-01'), 16 | (2, 'Bagel', 'Bread', 6, 1.3, '2023-06-01'), 17 | (3, 'English Muffin', 'Bread', 6, 1.2, '2023-06-01'), 18 | (4, 'Croissant', 'Pastry', 4, 2.1, '2023-06-01'), 19 | (5, 'White Loaf', 'Bread', 1, 1.8, '2023-06-01'), 20 | (6, 'Hamburger Bun', 'Bread', 10, 0.9, '2023-06-01'), 21 | (7, 'Rye Loaf', 'Bread', 1, 3.2, '2023-06-01'), 22 | (8, 'Whole Wheat Loaf', 'Bread', 1, 2.8, '2023-06-01'), 23 | (9, 'Muffin', 'Pastry', 12, 3.0, '2023-06-01'), 24 | (10, 'Cinnamon Bun', 'Pastry', 6, 3.4, '2023-06-01'), 25 | (11, 'Blueberry Muffin', 'Pastry', 12, 3.6, '2023-06-01'), 26 | (12, 'Chocolate Muffin', 'Pastry', 12, 3.6, '2023-06-01'); -------------------------------------------------------------------------------- /Chapter_15/Snowflake_objects/suspend_tasks.sql: -------------------------------------------------------------------------------- 1 | use database BAKERY_DB; 2 | -- suspend the pipeline so it doesn't continue to consume resources and send emails 3 | alter task ORCHESTRATION.PIPELINE_START_TASK suspend; 4 | 5 | -- snow sql -q "alter git repository ADMIN_DB.GIT_INTEGRATION.SF_DE fetch" 6 | -- snow sql -q "execute immediate from @ADMIN_DB.GIT_INTEGRATION.SF_DE/branches/main/Chapter_15/Snowflake_objects/suspend_tasks.sql" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Snowflake Data Engineering 2 | 3 | ## Part 1: Introducing Data Engineering with Snowflake 4 | ### Chapter 1: Data Engineering with Snowflake 5 | ### Chapter 2: Creating Your First Data Pipeline 6 | ## Part 2: Ingesting, Transforming, and Storing Data 7 | ### Chapter 3: Best Practices for Data Staging 8 | ### Chapter 4: Transforming Data 9 | ### Chapter 5: Continuous Data Ingestion 10 | ### Chapter 6: Executing Code Natively with Snowpark 11 | ### Chapter 7: Augmenting Data with Outputs from Large Language Models 12 | ### Chapter 8: Optimizing Query Performance 13 | ### Chapter 9: Controlling Cost 14 | ### Chapter 10: Data Governance and Access Control 15 | ## Part 3: Building Data Pipelines 16 | ### Chapter 11: Designing Data Pipelines 17 | ### Chapter 12: Ingesting Data Incrementally 18 | ### Chapter 13: Orchestrating Data Pipelines 19 | ### Chapter 14: Testing for Data Integrity and Completeness 20 | ### Chapter 15: Data Pipeline Continuous Integration 21 | ## Appendices 22 | ### Appendix A: Configuring Your Snowflake Environment 23 | ### Appendix B: Snowflake Objects used in the Examples 24 | --------------------------------------------------------------------------------