├── LICENSE ├── README.md └── Scripts ├── 01 - DDL.sql ├── 05 - Multi table inserts.sql ├── 12 - Create Warehouses.sql ├── 15 - MultipleWarehouses - 01 - Load Hubs.sql ├── 15 - MultipleWarehouses - 02 - Load Sats.sql ├── 15 - MultipleWarehouses - 03 - Load Links.sql ├── 30 - VARIANT - Data Load.sql └── 40 - BusinessVault.sql /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Dan Galavan, www.galavan.com 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Optimizing the Data Vault architecture on Snowflake 2 | This repository contains SQL scripts which complement the "Optimizing the Data Vault architecture on the Snowflake Cloud Data Platform" webinar. 3 | 4 | The scripts cover the management of parallel data loads, key management, creation of virtual warehouses, and management of big data on Snowflake. 5 | 6 | An overview of the scripts and link to the webinar can be found here: 7 | https://galavan.com/optimizing-the-data-vault-architecture-on-snowflake-free-sql/ 8 | -------------------------------------------------------------------------------- /Scripts/01 - DDL.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE DATABASE DATA_VAULT; 2 | 3 | USE DATA_VAULT.PUBLIC; 4 | 5 | -- *********************** SqlDBM: Snowflake ************************ 6 | -- ****************************************************************** 7 | 8 | 9 | CREATE SCHEMA IF NOT EXISTS "BIZ"; 10 | 11 | USE DATA_VAULT.PUBLIC; 12 | 13 | 14 | 15 | -- ************************************** "WEATHER_FORECAST_H" 16 | CREATE TABLE IF NOT EXISTS "WEATHER_FORECAST_H" 17 | ( 18 | "FORECAST_PK" varchar NOT NULL, 19 | "FORECAST_BK" varchar NOT NULL, 20 | "LOAD_DTS" timestamp NOT NULL, 21 | "REC_SRC" string NOT NULL, 22 | CONSTRAINT "PK_weather_forecast_h" PRIMARY KEY ( "FORECAST_PK" ), 23 | CONSTRAINT "AK_FORECAST_BK" UNIQUE ( "FORECAST_BK" ) 24 | ); 25 | 26 | 27 | -- ************************************** "SUPPLIER_INVENTORY_H" 28 | CREATE TABLE IF NOT EXISTS "SUPPLIER_INVENTORY_H" 29 | ( 30 | "INVENTORY_PK" varchar NOT NULL, 31 | "INVENTORY_BK" varchar NOT NULL, 32 | "LOAD_DTS" timestamp NOT NULL, 33 | "REC_SRC" varchar NOT NULL, 34 | CONSTRAINT "PK_inventory_h" PRIMARY KEY ( "INVENTORY_PK" ), 35 | CONSTRAINT "AK_SUPPLIER_INVENTORY_H" UNIQUE ( "INVENTORY_BK" ) 36 | ); 37 | 38 | 39 | -- ************************************** "SUPPLIER_H" 40 | CREATE TABLE IF NOT EXISTS "SUPPLIER_H" 41 | ( 42 | "SUPPLIER_PK" varchar NOT NULL, 43 | "SUPPLIER_BK" varchar NOT NULL, 44 | "LOAD_DTS" timestamp NOT NULL, 45 | "REC_SRC" varchar NOT NULL, 46 | CONSTRAINT "PK_supplier_h" PRIMARY KEY ( "SUPPLIER_PK" ), 47 | CONSTRAINT "AK_SUPPLIER_H" UNIQUE ( "SUPPLIER_BK" ) 48 | ); 49 | 50 | 51 | -- ************************************** "PART_H" 52 | CREATE TABLE IF NOT EXISTS "PART_H" 53 | ( 54 | "PART_PK" varchar NOT NULL, 55 | "PART_BK" varchar NOT NULL, 56 | "LOAD_DTS" timestamp NOT NULL, 57 | "REC_SRC" varchar NOT NULL, 58 | CONSTRAINT "PK_part_h" PRIMARY KEY ( "PART_PK" ), 59 | CONSTRAINT "AK_PART_H" UNIQUE ( "PART_BK" ) 60 | ); 61 | 62 | 63 | -- ************************************** "WEATHER_FORECAST_S" 64 | CREATE TABLE IF NOT EXISTS "WEATHER_FORECAST_S" 65 | ( 66 | "FORECAST_H_FK" varchar NOT NULL, 67 | "LOAD_DTS" timestamp NOT NULL, 68 | "FORECAST_MADE_DTS" timestamp NOT NULL, 69 | "FORECAST_ATTRIBUTES" variant NOT NULL, 70 | "HASH_DIFF" varchar NOT NULL, 71 | "REC_SRC" varchar NOT NULL, 72 | CONSTRAINT "PK_weather_forecast_s" PRIMARY KEY ( "FORECAST_H_FK", "LOAD_DTS" ), 73 | CONSTRAINT "Forecast_rel" FOREIGN KEY ( "FORECAST_H_FK" ) REFERENCES "WEATHER_FORECAST_H" ( "FORECAST_PK" ) 74 | ); 75 | 76 | 77 | -- ************************************** "SUPPLIER_S" 78 | CREATE TABLE IF NOT EXISTS "SUPPLIER_S" 79 | ( 80 | "SUPPLIER_H_FK" varchar NOT NULL, 81 | "LOAD_DTS" timestamp NOT NULL, 82 | "NAME" varchar, 83 | "ADDRESS" varchar, 84 | "PHONE" varchar, 85 | "ACCTBAL" varchar, 86 | "NATIONCODE" varchar, 87 | "HASH_DIFF" varchar NOT NULL, 88 | "REC_SRC" varchar NOT NULL, 89 | CONSTRAINT "PK_supplier_s" PRIMARY KEY ( "SUPPLIER_H_FK", "LOAD_DTS" ), 90 | CONSTRAINT "FK_S_H_S_SAT" FOREIGN KEY ( "SUPPLIER_H_FK" ) REFERENCES "SUPPLIER_H" ( "SUPPLIER_PK" ) 91 | ); 92 | 93 | 94 | -- ************************************** "SUPPLIER_INVENTORY_S" 95 | CREATE TABLE IF NOT EXISTS "SUPPLIER_INVENTORY_S" 96 | ( 97 | "INVENTORY_H_PK" varchar NOT NULL, 98 | "LOAD_DTS" timestamp NOT NULL, 99 | "SUPPLY_COST" number(12,2), 100 | "AVAILABLE_QTY" number(38,0), 101 | "PART_BK" varchar NOT NULL, 102 | "SUPPLIER_BK" varchar NOT NULL, 103 | "HASH_DIFF" varchar NOT NULL, 104 | "REC_SRC" varchar NOT NULL, 105 | CONSTRAINT "PK_supplier_inventory_s" PRIMARY KEY ( "INVENTORY_H_PK", "LOAD_DTS" ), 106 | CONSTRAINT "FK_I_SUP_INV" FOREIGN KEY ( "INVENTORY_H_PK" ) REFERENCES "SUPPLIER_INVENTORY_H" ( "INVENTORY_PK" ) 107 | ); 108 | 109 | 110 | -- ************************************** "SUPPLIER_INVENTORY_L" 111 | CREATE TABLE IF NOT EXISTS "SUPPLIER_INVENTORY_L" 112 | ( 113 | "SUPPLIER_INVENTORY_L_PK" varchar NOT NULL, 114 | "PART_PK" varchar NOT NULL, 115 | "SUPPLIER_PK" varchar NOT NULL, 116 | "INVENTORY_PK" varchar NOT NULL, 117 | "LOAD_DTS" varchar NOT NULL, 118 | "REC_SRC" varchar NOT NULL, 119 | CONSTRAINT "PK_supplier_inventory_l" PRIMARY KEY ( "SUPPLIER_INVENTORY_L_PK" ), 120 | CONSTRAINT "AK_S_I" UNIQUE ( "SUPPLIER_PK", "INVENTORY_PK", "PART_PK" ), 121 | CONSTRAINT "FK_PART_TO_SUPPLIER_INVENTORY" FOREIGN KEY ( "PART_PK" ) REFERENCES "PART_H" ( "PART_PK" ), 122 | CONSTRAINT "FK_S_I" FOREIGN KEY ( "INVENTORY_PK" ) REFERENCES "SUPPLIER_INVENTORY_H" ( "INVENTORY_PK" ), 123 | CONSTRAINT "FK_S_SUP_INV" FOREIGN KEY ( "SUPPLIER_PK" ) REFERENCES "SUPPLIER_H" ( "SUPPLIER_PK" ) 124 | ); 125 | 126 | 127 | -- ************************************** "PART_S" 128 | CREATE TABLE IF NOT EXISTS "PART_S" 129 | ( 130 | "PART_H_FK" varchar NOT NULL, 131 | "LOAD_DTS" timestamp NOT NULL, 132 | "PART_NAME" varchar, 133 | "PART_MANUFACTURER" varchar, 134 | "PART_BRAND" varchar, 135 | "PART_TYPE" varchar, 136 | "PART_SIZE" number(10), 137 | "PART_CONTAINER" varchar, 138 | "PART_RETAIL_PRICE" number(12,2), 139 | "HASH_DIFF" varchar NOT NULL, 140 | "REC_SRC" varchar NOT NULL, 141 | CONSTRAINT "PK_part_s" PRIMARY KEY ( "PART_H_FK", "LOAD_DTS" ), 142 | CONSTRAINT "FK_PART_H" FOREIGN KEY ( "PART_H_FK" ) REFERENCES "PART_H" ( "PART_PK" ) 143 | ) 144 | COMMENT = 'The Parts Satellite.'; 145 | 146 | 147 | -- ************************************** "BIZ"."VW_OPENWEATHER_FORECAST_MV" 148 | CREATE OR REPLACE MATERIALIZED VIEW BIZ.VW_OPENWEATHER_FORECAST_MV 149 | AS 150 | 151 | SELECT S.FORECAST_H_FK 152 | ,S.FORECAST_MADE_DTS 153 | ,S.FORECAST_ATTRIBUTES:city.country::STRING COUNTRY_CODE 154 | ,S.FORECAST_ATTRIBUTES:city.name::STRING CITY 155 | ,TO_TIMESTAMP(D.value:dt::STRING) Weather_TIMESTAMP 156 | 157 | ,(D.value:temp.day::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_DAYTIME -- convert Kelvin to Celcius 158 | ,(D.value:temp.min::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_MIN -- convert Kelvin to Celcius 159 | ,(D.value:temp.max::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_MAX -- convert Kelvin to Celcius 160 | 161 | ,W.value:description::STRING WEATHER_DESCRIPTION 162 | 163 | FROM DATA_VAULT."PUBLIC".WEATHER_FORECAST_S S 164 | ,LATERAL FLATTEN (input => S.FORECAST_ATTRIBUTES, path => 'data') D 165 | ,LATERAL FLATTEN (INPUT => D.value:weather) W 166 | 167 | WHERE S.FORECAST_ATTRIBUTES:city.country::STRING = 'DE' 168 | AND S.FORECAST_ATTRIBUTES:city.name::STRING = ('Munich'); 169 | 170 | -- ************************************** "BIZ"."VW_OPENWEATHER_FORECAST" 171 | Create OR REPLACE View BIZ.VW_OPENWEATHER_FORECAST 172 | 173 | Comment = 'Open Weather Map data' 174 | 175 | AS 176 | ( 177 | SELECT S.FORECAST_H_FK 178 | 179 | ,S.FORECAST_ATTRIBUTES:city.country::STRING COUNTRY_CODE 180 | ,S.FORECAST_ATTRIBUTES:city.name::STRING CITY 181 | ,TO_TIMESTAMP(D.value:dt::STRING) Weather_TIMESTAMP 182 | 183 | ,(D.value:temp.day::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_DAYTIME -- convert Kelvin to Celcius 184 | ,(D.value:temp.min::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_MIN -- convert Kelvin to Celcius 185 | ,(D.value:temp.max::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_MAX -- convert Kelvin to Celcius 186 | 187 | ,W.value:description::STRING WEATHER_DESCRIPTION 188 | 189 | FROM DATA_VAULT."PUBLIC".WEATHER_FORECAST_S S 190 | ,LATERAL FLATTEN (input => S.FORECAST_ATTRIBUTES, path => 'data') D 191 | ,LATERAL FLATTEN (INPUT => D.value:weather) W 192 | 193 | WHERE S.FORECAST_ATTRIBUTES:city.country::STRING = 'DE' 194 | AND S.FORECAST_ATTRIBUTES:city.name::STRING = ('Munich') 195 | AND S.FORECAST_MADE_DTS > (DATEADD(month,-6, CURRENT_TIMESTAMP())) 196 | ORDER BY S.FORECAST_MADE_DTS DESC, Weather_TIMESTAMP DESC 197 | ); 198 | 199 | -------------------------------------------------------------------------------- /Scripts/05 - Multi table inserts.sql: -------------------------------------------------------------------------------- 1 | ------------------------------------------------- 2 | -- MULTI-TABLE INSERTS 3 | ------------------------------------------------- 4 | 5 | USE DATABASE SNOWFLAKE_SAMPLE_DATA; 6 | USE SCHEMA TPCH_SF1; 7 | 8 | SET REC_SRC = 'SNOWFLAKE_SAMPLE_DATABASE'; 9 | 10 | 11 | -- Insert into all tables within the same transation 12 | -- With Overwrite, Truncation occurs within the same transaction aswell. 13 | 14 | ------------------------------------- 15 | ------- Supplier Hub & Sat 16 | ------------------------------------- 17 | INSERT OVERWRITE ALL 18 | INTO DATA_VAULT."PUBLIC".SUPPLIER_H 19 | VALUES (PK,S_SUPPKEY,CT,RS) 20 | 21 | INTO DATA_VAULT."PUBLIC".SUPPLIER_S (SUPPLIER_H_FK, LOAD_DTS, NAME, ADDRESS, PHONE, ACCTBAL, NATIONCODE, HASH_DIFF, REC_SRC) 22 | VALUES (PK,CT,S_NAME,S_ADDRESS,S_PHONE,S_ACCTBAL,S_NATIONKEY,HASH_DIFF,RS) 23 | 24 | SELECT MD5(S.S_SUPPKEY) AS PK -- Add preferred Hashing approach 25 | ,S.S_SUPPKEY 26 | ,CURRENT_TIMESTAMP() CT 27 | ,S_NAME 28 | ,S_ADDRESS 29 | ,S_PHONE 30 | ,S_ACCTBAL 31 | ,S_NATIONKEY 32 | ,MD5(S_NAME || S_ADDRESS || S_PHONE || S_ACCTBAL || S_NATIONKEY) HASH_DIFF -- Add preferred Hashing approach 33 | ,$REC_SRC RS 34 | FROM SUPPLIER S; 35 | 36 | 37 | 38 | -------------------------------- 39 | -- Part Hub & Sat 40 | -------------------------------- 41 | 42 | INSERT OVERWRITE ALL 43 | 44 | -- Hub PART_H 45 | INTO DATA_VAULT."PUBLIC".PART_H 46 | VALUES (PK,P_PARTKEY,CT,RS) 47 | 48 | -- Sat PART_S 49 | INTO DATA_VAULT."PUBLIC".PART_S (PART_H_FK, LOAD_DTS, PART_NAME, PART_MANUFACTURER, PART_BRAND, PART_TYPE, PART_SIZE, PART_CONTAINER, PART_RETAIL_PRICE, HASH_DIFF, REC_SRC) 50 | VALUES (PK,CT,P_NAME,P_MFGR,P_BRAND,P_TYPE,P_SIZE,P_CONTAINER,P_RETAILPRICE,HASH_DIFF,RS) 51 | 52 | -- Source 53 | SELECT MD5(P.P_PARTKEY) AS PK -- Add preferred Hashing approach 54 | ,P.P_PARTKEY 55 | ,CURRENT_TIMESTAMP() CT 56 | ,P.P_NAME 57 | ,P_MFGR 58 | ,P_BRAND 59 | ,P_TYPE 60 | ,P_SIZE 61 | ,P_CONTAINER 62 | ,P_RETAILPRICE 63 | 64 | ,MD5(P.P_NAME || P_MFGR || P_BRAND || P_TYPE || P_SIZE || P_CONTAINER || P_RETAILPRICE ) HASH_DIFF -- Add preferred Hashing approach 65 | ,$REC_SRC RS 66 | --,'' RS 67 | FROM PART P 68 | ; 69 | 70 | -------------------------------- 71 | ---- Inventory Hub, Sat, Link 72 | -------------------------------- 73 | 74 | INSERT OVERWRITE ALL 75 | INTO DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_H 76 | VALUES (INVENTORY_PK,INVENTORY_BK,CT,RS) 77 | 78 | INTO DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_S (INVENTORY_H_PK, LOAD_DTS, SUPPLY_COST, AVAILABLE_QTY, PART_BK, SUPPLIER_BK, HASH_DIFF, REC_SRC) 79 | VALUES (INVENTORY_PK,CT,PS_SUPPLYCOST,PS_AVAILQTY,PS_PARTKEY,PS_SUPPKEY,HASH_DIFF,RS) 80 | 81 | INTO DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_L (SUPPLIER_INVENTORY_L_PK, PART_PK, SUPPLIER_PK, INVENTORY_PK, LOAD_DTS, REC_SRC) 82 | VALUES (INVENTORY_L_PK,PART_PK,SUPPLIER_PK,INVENTORY_PK,CT,RS) 83 | 84 | 85 | SELECT PS_PARTKEY || PS_SUPPKEY AS INVENTORY_BK 86 | ,MD5(INVENTORY_BK) AS INVENTORY_PK -- Add preferred Hashing approach 87 | ,MD5(PS_PARTKEY) AS PART_PK -- Add preferred Hashing approach 88 | ,MD5(PS_SUPPKEY) AS SUPPLIER_PK -- Add preferred Hashing approach 89 | ,MD5(PS_PARTKEY || PS_SUPPKEY || INVENTORY_BK) AS INVENTORY_L_PK 90 | ,PS_AVAILQTY 91 | ,PS_SUPPLYCOST 92 | ,PS_PARTKEY 93 | ,PS_SUPPKEY 94 | ,MD5(PS_AVAILQTY || PS_SUPPLYCOST) HASH_DIFF -- Add preferred Hashing approach 95 | ,CURRENT_TIMESTAMP() CT 96 | ,$REC_SRC RS 97 | --,'' RS 98 | FROM PARTSUPP P; -------------------------------------------------------------------------------- /Scripts/12 - Create Warehouses.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dangalavan/Optimizing-DataVault-on-Snowflake/c9f57e417c5c7ad0e9ce6b23acc492d75a79c60e/Scripts/12 - Create Warehouses.sql -------------------------------------------------------------------------------- /Scripts/15 - MultipleWarehouses - 01 - Load Hubs.sql: -------------------------------------------------------------------------------- 1 | /* 2 | * -- RESET 3 | 4 | TRUNCATE TABLE DATA_VAULT."PUBLIC".PART_H; 5 | TRUNCATE TABLE DATA_VAULT."PUBLIC".SUPPLIER_H; 6 | TRUNCATE TABLE DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_H; 7 | */ 8 | 9 | 10 | USE DATABASE SNOWFLAKE_SAMPLE_DATA; 11 | 12 | USE SCHEMA TPCH_SF10; 13 | 14 | SET REC_SRC = 'SNOWFLAKE_SAMPLE_DATABASE'; 15 | 16 | USE WAREHOUSE WH_ETL_HUBS; 17 | 18 | ----------------- 19 | -- Part Hub 20 | ----------------- 21 | INSERT INTO DATA_VAULT."PUBLIC".PART_H 22 | (PART_PK,PART_BK,LOAD_DTS,REC_SRC) 23 | 24 | SELECT MD5(P.P_PARTKEY) AS PK -- Add preferred Hashing approach 25 | ,P.P_PARTKEY 26 | ,CURRENT_TIMESTAMP() CT 27 | ,$REC_SRC RS 28 | FROM PART P 29 | WHERE PK NOT IN (SELECT H.PART_PK FROM DATA_VAULT."PUBLIC".PART_H H); 30 | 31 | ----------------- 32 | -- Supplier Hub 33 | ----------------- 34 | INSERT INTO DATA_VAULT."PUBLIC".SUPPLIER_H 35 | (SUPPLIER_PK,SUPPLIER_BK,LOAD_DTS,REC_SRC) 36 | 37 | SELECT MD5(S.S_SUPPKEY) AS PK -- Add preferred Hashing approach 38 | ,S.S_SUPPKEY 39 | ,CURRENT_TIMESTAMP() CT 40 | ,$REC_SRC RS 41 | FROM SUPPLIER S 42 | WHERE PK NOT IN (SELECT H.SUPPLIER_PK FROM DATA_VAULT."PUBLIC".SUPPLIER_H H); 43 | 44 | ----------------- 45 | -- Inventory Hub 46 | ----------------- 47 | 48 | INSERT INTO DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_H 49 | (INVENTORY_PK,INVENTORY_BK,LOAD_DTS,REC_SRC) 50 | SELECT MD5(PS_PARTKEY || PS_SUPPKEY) AS INVENTORY_PK -- Add preferred Hashing approach 51 | ,PS_PARTKEY || PS_SUPPKEY AS INVENTORY_BK 52 | ,CURRENT_TIMESTAMP() CT 53 | ,$REC_SRC RS 54 | --,'' RS 55 | FROM PARTSUPP P 56 | WHERE INVENTORY_PK NOT IN (SELECT H.INVENTORY_PK FROM DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_H H); 57 | 58 | 59 | -------------------------------------------------------------------------------- /Scripts/15 - MultipleWarehouses - 02 - Load Sats.sql: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * -- RESET 4 | 5 | TRUNCATE TABLE DATA_VAULT."PUBLIC".PART_S; 6 | TRUNCATE TABLE DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_S; 7 | TRUNCATE TABLE DATA_VAULT."PUBLIC".SUPPLIER_S; 8 | */ 9 | 10 | USE DATABASE SNOWFLAKE_SAMPLE_DATA; 11 | 12 | USE SCHEMA TPCH_SF10; 13 | 14 | SET REC_SRC = 'SNOWFLAKE_SAMPLE_DATABASE'; 15 | 16 | ------------------------------------- 17 | ------- Supplier Sat 18 | ------------------------------------- 19 | USE WAREHOUSE WH_ETL_SATS; 20 | 21 | 22 | INSERT INTO DATA_VAULT."PUBLIC".SUPPLIER_S (SUPPLIER_H_FK, LOAD_DTS, NAME, ADDRESS, PHONE, ACCTBAL, NATIONCODE, HASH_DIFF, REC_SRC) 23 | 24 | SELECT MD5(S.S_SUPPKEY) AS PK -- Add preferred Hashing approach 25 | ,CURRENT_TIMESTAMP() AS CT 26 | 27 | ,S_NAME 28 | ,S_ADDRESS 29 | ,S_PHONE 30 | ,S_ACCTBAL 31 | ,S_NATIONKEY 32 | 33 | ,MD5(S_NAME || S_ADDRESS || S_PHONE || S_ACCTBAL || S_NATIONKEY) HASH_DIFF -- Add preferred Hashing approach 34 | ,$REC_SRC RS 35 | FROM SUPPLIER S 36 | WHERE HASH_DIFF NOT IN (SELECT HASH_DIFF FROM DATA_VAULT."PUBLIC".SUPPLIER_S) 37 | ; 38 | 39 | 40 | -------------------------------- 41 | -- Part Sat 42 | -------------------------------- 43 | 44 | INSERT INTO DATA_VAULT."PUBLIC".PART_S (PART_H_FK, LOAD_DTS, PART_NAME, PART_MANUFACTURER, PART_BRAND, PART_TYPE, PART_SIZE, PART_CONTAINER, PART_RETAIL_PRICE, HASH_DIFF, REC_SRC) 45 | -- Source 46 | SELECT MD5(P.P_PARTKEY) AS PK -- Add preferred Hashing approach 47 | ,CURRENT_TIMESTAMP() CT 48 | 49 | ,P.P_NAME 50 | ,P_MFGR 51 | ,P_BRAND 52 | ,P_TYPE 53 | ,P_SIZE 54 | ,P_CONTAINER 55 | ,P_RETAILPRICE 56 | 57 | ,MD5(P.P_NAME || P_MFGR || P_BRAND || P_TYPE || P_SIZE || P_CONTAINER || P_RETAILPRICE ) HASH_DIFF -- Add preferred Hashing approach 58 | ,$REC_SRC RS 59 | --,'' RS 60 | FROM PART P 61 | WHERE HASH_DIFF NOT IN (SELECT HASH_DIFF FROM DATA_VAULT."PUBLIC".PART_S); 62 | 63 | 64 | 65 | -------------------------------- 66 | ---- Inventory Sat 67 | -------------------------------- 68 | 69 | 70 | INSERT INTO DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_S (INVENTORY_H_PK, LOAD_DTS, SUPPLY_COST, AVAILABLE_QTY, PART_BK, SUPPLIER_BK, HASH_DIFF, REC_SRC) 71 | 72 | SELECT MD5(PS_PARTKEY || PS_SUPPKEY) AS INVENTORY_PK -- Add preferred Hashing approach 73 | ,CURRENT_TIMESTAMP() AS CT 74 | 75 | ,PS_SUPPLYCOST 76 | ,PS_AVAILQTY 77 | 78 | ,PS_PARTKEY 79 | ,PS_SUPPKEY 80 | 81 | ,MD5(PS_AVAILQTY || PS_SUPPLYCOST) HASH_DIFF -- Add preferred Hashing approach 82 | 83 | ,$REC_SRC RS 84 | --,'' RS 85 | FROM PARTSUPP P 86 | WHERE HASH_DIFF NOT IN (SELECT HASH_DIFF FROM DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_S) 87 | ; 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /Scripts/15 - MultipleWarehouses - 03 - Load Links.sql: -------------------------------------------------------------------------------- 1 | /* 2 | * -- RESET 3 | 4 | TRUNCATE TABLE DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_L; 5 | */ 6 | 7 | USE DATABASE SNOWFLAKE_SAMPLE_DATA; 8 | 9 | USE SCHEMA TPCH_SF1; 10 | 11 | SET REC_SRC = 'SNOWFLAKE_SAMPLE_DATABASE'; 12 | 13 | USE WAREHOUSE WH_ETL_LINKS; 14 | 15 | 16 | -------------------------------- 17 | ---- Inventory Link 18 | -------------------------------- 19 | 20 | 21 | INSERT INTO DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_L 22 | ( 23 | SUPPLIER_INVENTORY_L_PK 24 | , PART_PK 25 | , SUPPLIER_PK 26 | , INVENTORY_PK 27 | , LOAD_DTS 28 | , REC_SRC 29 | ) 30 | 31 | WITH cte_InvLink 32 | AS 33 | ( 34 | SELECT (PS_PARTKEY || PS_SUPPKEY) AS INVENTORY_BK -- Add preferred Hashing approach 35 | ,MD5(PS_PARTKEY || PS_SUPPKEY || INVENTORY_BK) AS SUPPLIER_INVENTORY_L_PK 36 | ,CURRENT_TIMESTAMP() CT 37 | 38 | ,MD5(INVENTORY_BK) AS INVENTORY_PK -- Add preferred Hashing approach 39 | ,MD5(PS_PARTKEY) AS PART_PK -- Add preferred Hashing approach 40 | ,MD5(PS_SUPPKEY) AS SUPPLIER_PK -- Add preferred Hashing approach 41 | 42 | ,$REC_SRC RS 43 | --,'' RS 44 | 45 | FROM PARTSUPP P 46 | WHERE SUPPLIER_INVENTORY_L_PK NOT IN (SELECT L.SUPPLIER_INVENTORY_L_PK FROM DATA_VAULT."PUBLIC".SUPPLIER_INVENTORY_L L) 47 | ) 48 | 49 | 50 | SELECT SUPPLIER_INVENTORY_L_PK 51 | ,PART_PK 52 | ,SUPPLIER_PK 53 | ,INVENTORY_PK 54 | ,CT 55 | ,RS 56 | FROM cte_InvLink; 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /Scripts/30 - VARIANT - Data Load.sql: -------------------------------------------------------------------------------- 1 | USE DATABASE SNOWFLAKE_SAMPLE_DATA; 2 | USE SCHEMA WEATHER; 3 | SET REC_SRC = 'SNOWFLAKE_SAMPLE_DATABASE_WEATHER'; 4 | 5 | /* 6 | --RESET 7 | 8 | TRUNCATE TABLE DATA_VAULT.PUBLIC.WEATHER_FORECAST_H; 9 | TRUNCATE TABLE DATA_VAULT.PUBLIC.WEATHER_FORECAST_S; 10 | 11 | */ 12 | 13 | -- https://openweathermap.org/forecast16#JSON 14 | 15 | 16 | USE WAREHOUSE COMPUTE_WH; 17 | 18 | ALTER WAREHOUSE COMPUTE_WH SET WAREHOUSE_SIZE = 'MEDIUM'; 19 | 20 | INSERT ALL 21 | 22 | ---------------------------------- 23 | -- Hub - Weather Forecast 24 | ---------------------------------- 25 | WHEN (SELECT COUNT(*) 26 | FROM DATA_VAULT.PUBLIC.WEATHER_FORECAST_H H 27 | WHERE H.FORECAST_PK = PK) = 0 28 | THEN 29 | INTO DATA_VAULT.PUBLIC.WEATHER_FORECAST_H(FORECAST_BK,FORECAST_PK,LOAD_DTS,REC_SRC) 30 | VALUES (BK, PK, LOAD_DTS, REC_SRC) 31 | 32 | ---------------------------------- 33 | -- Sat - Weather Forecast 34 | ---------------------------------- 35 | WHEN (SELECT COUNT(*) 36 | FROM DATA_VAULT.PUBLIC.WEATHER_FORECAST_S S 37 | WHERE S.HASH_DIFF = HASH_DIFF) = 0 38 | THEN 39 | INTO DATA_VAULT.PUBLIC.WEATHER_FORECAST_S (FORECAST_H_FK,LOAD_DTS,FORECAST_MADE_DTS, FORECAST_ATTRIBUTES,HASH_DIFF,REC_SRC) 40 | VALUES (PK, LOAD_DTS, FORECAST_MADE_DTS, VARIANT_PAYLOAD, HASH_DIFF, REC_SRC) 41 | 42 | ------------------------------ 43 | -- Source ("Staging") 44 | ------------------------------ 45 | SELECT (W.T::STRING || '-' || W.V:city.id::STRING) AS BK -- Concatenate JSON timestamp and City ID 46 | ,MD5(BK) AS PK -- Add preferred Hashing approach 47 | ,CURRENT_TIMESTAMP() AS LOAD_DTS 48 | ,w.T AS FORECAST_MADE_DTS 49 | ,w.V AS VARIANT_PAYLOAD 50 | ,MD5(w.V) AS HASH_DIFF 51 | ,$REC_SRC AS REC_SRC 52 | FROM "SNOWFLAKE_SAMPLE_DATA"."WEATHER"."DAILY_14_TOTAL" W; 53 | 54 | 55 | ALTER WAREHOUSE COMPUTE_WH SET WAREHOUSE_SIZE = 'XSMALL'; -------------------------------------------------------------------------------- /Scripts/40 - BusinessVault.sql: -------------------------------------------------------------------------------- 1 | USE DATABASE DATA_VAULT; 2 | 3 | USE SCHEMA PUBLIC; 4 | 5 | USE WAREHOUSE COMPUTE_WH; 6 | 7 | ALTER WAREHOUSE COMPUTE_WH SET WAREHOUSE_SIZE = 'SMALL'; 8 | 9 | /* Create OR REPLACE View BIZ.VW_OPENWEATHER_FORECAST 10 | Comment = 'Open Weather Map data' 11 | AS ( */ 12 | 13 | SELECT S.FORECAST_H_FK 14 | ,S.FORECAST_ATTRIBUTES:city.country::STRING COUNTRY_CODE 15 | ,S.FORECAST_ATTRIBUTES:city.name::STRING CITY 16 | ,TO_TIMESTAMP(D.value:dt::STRING) Weather_TIMESTAMP 17 | 18 | ,(D.value:temp.day::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_DAYTIME -- convert Kelvin to Celcius 19 | ,(D.value:temp.min::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_MIN -- convert Kelvin to Celcius 20 | ,(D.value:temp.max::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_MAX -- convert Kelvin to Celcius 21 | 22 | ,W.value:description::STRING WEATHER_DESCRIPTION 23 | 24 | FROM DATA_VAULT."PUBLIC".WEATHER_FORECAST_S S 25 | ,LATERAL FLATTEN (input => S.FORECAST_ATTRIBUTES, path => 'data') D 26 | ,LATERAL FLATTEN (INPUT => D.value:weather) W 27 | 28 | WHERE S.FORECAST_ATTRIBUTES:city.country::STRING = 'DE' 29 | AND S.FORECAST_ATTRIBUTES:city.name::STRING = ('Munich') 30 | AND S.FORECAST_MADE_DTS > (DATEADD(month,-6, CURRENT_TIMESTAMP())) 31 | ORDER BY S.FORECAST_MADE_DTS DESC, Weather_TIMESTAMP DESC 32 | --) 33 | ; 34 | 35 | ALTER WAREHOUSE COMPUTE_WH SET WAREHOUSE_SIZE = 'XSMALL'; 36 | 37 | 38 | -- view results 39 | SELECT * 40 | FROM DATA_VAULT.BIZ.VW_OPENWEATHER_FORECAST; 41 | 42 | 43 | -------------------------------------------------------------------------------------------------------------------- 44 | ------------------------------------------------------------------------------------------------------------------------ 45 | -- Optimize 46 | -- 47 | -- Background process automatically maintains the data in the materialzed view. 48 | -- Credit costs are tracked in a Snowflake-provided virtual warehouse named MATERIALIZED_VIEW_MAINTENANCE. 49 | ------------------------------------------------------------------------------------------------------------------------ 50 | ------------------------------------------------------------------------------------------------------------------------ 51 | 52 | -- 1. DDL 53 | CREATE OR REPLACE MATERIALIZED VIEW BIZ.VW_OPENWEATHER_FORECAST_MV 54 | AS 55 | 56 | SELECT S.FORECAST_H_FK 57 | ,S.FORECAST_MADE_DTS 58 | ,S.FORECAST_ATTRIBUTES:city.country::STRING COUNTRY_CODE 59 | ,S.FORECAST_ATTRIBUTES:city.name::STRING CITY 60 | ,TO_TIMESTAMP(D.value:dt::STRING) Weather_TIMESTAMP 61 | 62 | ,(D.value:temp.day::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_DAYTIME -- convert Kelvin to Celcius 63 | ,(D.value:temp.min::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_MIN -- convert Kelvin to Celcius 64 | ,(D.value:temp.max::decimal(10,2) - 273.15) TEMPERATURE_CELCIUS_MAX -- convert Kelvin to Celcius 65 | 66 | ,W.value:description::STRING WEATHER_DESCRIPTION 67 | 68 | FROM DATA_VAULT."PUBLIC".WEATHER_FORECAST_S S 69 | ,LATERAL FLATTEN (input => S.FORECAST_ATTRIBUTES, path => 'data') D 70 | ,LATERAL FLATTEN (INPUT => D.value:weather) W 71 | 72 | WHERE S.FORECAST_ATTRIBUTES:city.country::STRING = 'DE' 73 | AND S.FORECAST_ATTRIBUTES:city.name::STRING = ('Munich'); 74 | 75 | 76 | -- 2. View results 77 | 78 | SELECT * 79 | FROM BIZ.VW_OPENWEATHER_FORECAST_MV MV 80 | WHERE MV.FORECAST_MADE_DTS > (DATEADD(month,-6, CURRENT_TIMESTAMP())) 81 | ORDER BY MV.FORECAST_MADE_DTS DESC 82 | ,MV.Weather_TIMESTAMP DESC; 83 | 84 | 85 | --------------------------------------------------------------------------------