├── DimDate.csv ├── New and Changed Data.zip ├── NewFactSalesView.sql ├── README.md ├── Source Data Small.zip ├── Source Data.zip ├── create_dimensional_objects.sql ├── create_incremental_and_scd_objects.sql ├── create_sql_objects.sql └── livestream └── code.sql /New and Changed Data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datahai/logicaldatawarehouse/da5f1ce71c64a530d1ae3ca74f0e98d41c213272/New and Changed Data.zip -------------------------------------------------------------------------------- /NewFactSalesView.sql: -------------------------------------------------------------------------------- 1 | DROP VIEW LDW.vwFactSales; 2 | GO 3 | 4 | CREATE VIEW LDW.vwFactSales 5 | AS 6 | SELECT *, 7 | CAST(fct.filepath(3) AS DATE) AS SalesOrderPathDate 8 | FROM 9 | OPENROWSET 10 | ( 11 | BULK 'conformed/facts/factsales/*/*/*/*.parquet', 12 | DATA_SOURCE = 'ExternalDataSourceDataLake', 13 | FORMAT = 'Parquet' 14 | ) AS fct -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # logicaldatawarehouse 2 | -------------------------------------------------------------------------------- /Source Data Small.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datahai/logicaldatawarehouse/da5f1ce71c64a530d1ae3ca74f0e98d41c213272/Source Data Small.zip -------------------------------------------------------------------------------- /Source Data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datahai/logicaldatawarehouse/da5f1ce71c64a530d1ae3ca74f0e98d41c213272/Source Data.zip -------------------------------------------------------------------------------- /create_dimensional_objects.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA STG AUTHORIZATION dbo; 2 | 3 | --Create Parquet file format 4 | CREATE EXTERNAL FILE FORMAT SynapseParquetFormat 5 | WITH ( 6 | FORMAT_TYPE = PARQUET 7 | ); 8 | 9 | --Customer 10 | CREATE EXTERNAL TABLE STG.DimCustomer 11 | WITH 12 | ( 13 | LOCATION = 'conformed/dimensions/dimcustomer/01', 14 | DATA_SOURCE = ExternalDataSourceDataLake, 15 | FILE_FORMAT = SynapseParquetFormat 16 | ) 17 | AS 18 | SELECT CAST(ROW_NUMBER() OVER(ORDER BY C.CustomerID) AS INT) AS CustomerKey, 19 | CAST(C.CustomerID AS INT) AS CustomerID, 20 | C.CustomerName, 21 | CC.CustomerCategoryName, 22 | BG.BuyingGroupName, 23 | DM.DeliveryMethodName, 24 | DC.CityName AS DeliveryCityName, 25 | DSP.StateProvinceName AS DeliveryStateProvinceName, 26 | DSP.SalesTerritory AS DeliverySalesTerritory, 27 | DCO.Country AS DeliveryCountry, 28 | DCO.Continent AS DeliveryContinent, 29 | DCO.Region AS DeliveryRegion, 30 | DCO.Subregion AS DeliverySubregion, 31 | CAST('2013-01-01' AS DATE) AS ValidFromDate 32 | FROM LDW.vwCustomers C 33 | LEFT JOIN LDW.vwCustomerCategories CC On CC.CustomerCategoryID = C.CustomerCategoryID 34 | LEFT JOIN LDW.vwCities DC ON DC.CityID = C.DeliveryCityID 35 | LEFT JOIN LDW.vwStateProvinces DSP ON DSP.StateProvinceID = DC.StateProvinceID 36 | LEFT JOIN LDW.vwCountries DCO ON DCO.CountryID = DSP.CountryID 37 | LEFT JOIN LDW.vwBuyingGroups BG ON BG.BuyingGroupID = C.BuyingGroupID 38 | LEFT JOIN LDW.vwDeliveryMethods DM ON DM.DeliveryMethodID = C.DeliveryMethodID 39 | ORDER BY C.CustomerID 40 | 41 | --StockItem 42 | CREATE EXTERNAL TABLE STG.DimStockItem 43 | WITH 44 | ( 45 | LOCATION = 'conformed/dimensions/dimstockitem/01', 46 | DATA_SOURCE = ExternalDataSourceDataLake, 47 | FILE_FORMAT = SynapseParquetFormat 48 | ) 49 | AS 50 | SELECT CAST(ROW_NUMBER() OVER(ORDER BY SI.StockItemID) AS SMALLINT) AS StockItemKey, 51 | CAST(SI.StockItemID AS SMALLINT) AS StockItemID, 52 | SI.StockItemName, 53 | SI.LeadTimeDays, 54 | C.ColorName, 55 | OP.PackageTypeName AS OuterPackageTypeName, 56 | CAST('2013-01-01' AS DATE) AS ValidFromDate 57 | FROM LDW.vwStockItems SI 58 | LEFT JOIN LDW.vwColors C ON C.ColorID = SI.ColorID 59 | LEFT JOIN LDW.vwPackageTypes OP ON OP.PackageTypeID = SI.OuterPackageID 60 | ORDER BY SI.StockItemID 61 | 62 | --Supplier 63 | CREATE EXTERNAL TABLE STG.DimSupplier 64 | WITH 65 | ( 66 | LOCATION = 'conformed/dimensions/dimsupplier/01', 67 | DATA_SOURCE = ExternalDataSourceDataLake, 68 | FILE_FORMAT = SynapseParquetFormat 69 | ) 70 | AS 71 | SELECT CAST(ROW_NUMBER() OVER(ORDER BY S.SupplierID) AS TINYINT) AS SupplierKey, 72 | CAST(S.SupplierID AS TINYINT) AS SupplierID, 73 | S.SupplierName, 74 | SC.SupplierCategoryName, 75 | CAST('2013-01-01' AS DATE) AS ValidFromDate 76 | FROM LDW.vwSuppliers S 77 | LEFT JOIN LDW.vwSupplierCategories SC ON SC.SupplierCategoryID = S.SupplierCategoryID 78 | ORDER BY S.SupplierID; 79 | 80 | --Date 81 | CREATE EXTERNAL TABLE STG.DimDate 82 | WITH 83 | ( 84 | LOCATION = 'conformed/dimensions/dimdate', 85 | DATA_SOURCE = ExternalDataSourceDataLake, 86 | FILE_FORMAT = SynapseParquetFormat 87 | ) 88 | AS 89 | SELECT CAST(DateKey AS INT) AS DateKey, 90 | CAST(Date AS DATE) AS Date, 91 | CAST(Day AS TINYINT) AS Day, 92 | CAST(WeekDay AS TINYINT) AS WeekDay, 93 | WeekDayName, 94 | CAST(Month AS TINYINT) AS Month, 95 | MonthName, 96 | CAST(Quarter AS TINYINT) AS Quarter, 97 | CAST(Year AS SMALLINT) AS Year 98 | FROM 99 | OPENROWSET 100 | ( 101 | BULK 'sourcedatadim/datedim/*.csv', 102 | DATA_SOURCE = 'ExternalDataSourceDataLake', 103 | FORMAT = 'CSV', 104 | PARSER_VERSION = '2.0', 105 | HEADER_ROW = TRUE, 106 | FIELDTERMINATOR ='|' 107 | ) AS fct 108 | 109 | 110 | --Customer 111 | CREATE VIEW LDW.vwDimCustomer 112 | AS 113 | SELECT * FROM 114 | OPENROWSET 115 | ( 116 | BULK 'conformed/dimensions/dimcustomer/*/', 117 | DATA_SOURCE = 'ExternalDataSourceDataLake', 118 | FORMAT = 'Parquet' 119 | ) AS fct 120 | 121 | --StockItem 122 | CREATE VIEW LDW.vwDimStockItem 123 | AS 124 | SELECT * FROM 125 | OPENROWSET 126 | ( 127 | BULK 'conformed/dimensions/dimstockitem/*/', 128 | DATA_SOURCE = 'ExternalDataSourceDataLake', 129 | FORMAT = 'Parquet' 130 | ) AS fct 131 | 132 | --Supplier 133 | CREATE VIEW LDW.vwDimSupplier 134 | AS 135 | SELECT * FROM 136 | OPENROWSET 137 | ( 138 | BULK 'conformed/dimensions/dimsupplier/*/', 139 | DATA_SOURCE = 'ExternalDataSourceDataLake', 140 | FORMAT = 'Parquet' 141 | ) AS fct 142 | 143 | --Date 144 | CREATE VIEW LDW.vwDimDate 145 | AS 146 | SELECT * FROM 147 | OPENROWSET 148 | ( 149 | BULK 'conformed/dimensions/dimdate', 150 | DATA_SOURCE = 'ExternalDataSourceDataLake', 151 | FORMAT = 'Parquet' 152 | ) AS fct 153 | 154 | 155 | CREATE EXTERNAL TABLE STG.FactSales 156 | WITH 157 | ( 158 | LOCATION = 'conformed/facts/factsales/initial', 159 | DATA_SOURCE = ExternalDataSourceDataLake, 160 | FILE_FORMAT = SynapseParquetFormat 161 | ) 162 | AS 163 | SELECT 164 | --Surrogate Keys 165 | DC.CustomerKey, 166 | CAST(FORMAT(SO.OrderDate,'yyyyMMdd') AS INT) as OrderDateKey, 167 | DSI.StockItemKey, 168 | DS.SupplierKey, 169 | --Degenerate Dimensions 170 | CAST(SO.OrderID AS INT) AS OrderID, 171 | CAST(SOL.OrderLineID AS INT) AS OrderLineID, 172 | --Measure 173 | CAST(SOL.Quantity AS INT) AS SalesOrderQuantity, 174 | CAST(SOL.UnitPrice AS DECIMAL(18,2)) AS SalesOrderUnitPrice 175 | FROM LDW.vwSalesOrdersLines SOL 176 | INNER JOIN LDW.vwSalesOrders SO ON SOL.OrderID = SO.OrderID 177 | LEFT JOIN LDW.vwDimCustomer DC ON DC.CustomerID = SO.CustomerID 178 | LEFT JOIN LDW.vwDimStockItem DSI ON DSI.StockItemID = SOL.StockItemID 179 | LEFT JOIN LDW.vwStockItems SI ON SI.StockItemID = DSI.StockItemID 180 | LEFT JOIN LDW.vwDimSupplier DS ON DS.SupplierID = SI.SupplierID; 181 | 182 | 183 | CREATE VIEW LDW.vwFactSales 184 | AS 185 | SELECT * FROM 186 | OPENROWSET 187 | ( 188 | BULK 'conformed/facts/factsales/initial', 189 | DATA_SOURCE = 'ExternalDataSourceDataLake', 190 | FORMAT = 'Parquet' 191 | ) AS fct 192 | 193 | 194 | --Group Sales by Date 195 | SELECT DD.[Year] AS SalesYear, 196 | DD.[Month] AS SalesMonth, 197 | SUM(FS.Quantity) AS SalesOrderQuantity, 198 | SUM(FS.UnitPrice) AS SalesOrderUnitPrice, 199 | COUNT(DISTINCT FS.OrderID) AS SalesOrderTotal 200 | FROM LDW.vwFactSales FS 201 | INNER JOIN LDW.vwDimDate DD ON DD.DateKey = FS.OrderDateKey 202 | GROUP BY DD.[Year], 203 | DD.[Month] 204 | ORDER BY DD.[Year], 205 | DD.[Month]; 206 | 207 | --Group Sales by Customer 208 | SELECT DC.DeliverySalesTerritory, 209 | SUM(FS.Quantity) AS SalesOrderQuantity, 210 | SUM(FS.UnitPrice) AS SalesOrderUnitPrice, 211 | COUNT(DISTINCT OrderID) AS SalesOrderTotal 212 | FROM LDW.vwFactSales FS 213 | INNER JOIN LDW.vwDimCustomer DC ON DC.CustomerKey = FS.CustomerKey 214 | GROUP BY DC.DeliverySalesTerritory 215 | ORDER BY SUM(FS.Quantity) DESC; 216 | 217 | --Group Sales by Supplier 218 | --Note that multiple Suppliers can be linked to a single Sales Order 219 | SELECT DS.SupplierName, 220 | SUM(FS.Quantity) AS SalesOrderQuantity, 221 | SUM(FS.UnitPrice) AS SalesOrderUnitPrice 222 | FROM LDW.vwFactSales FS 223 | INNER JOIN LDW.vwDimSupplier DS ON DS.SupplierKey = FS.SupplierKey 224 | GROUP BY DS.SupplierName 225 | ORDER BY SUM(FS.Quantity) DESC; 226 | 227 | 228 | EXEC sp_describe_first_result_set N'SELECT * FROM LDW.vwDimDate'; 229 | 230 | EXEC sp_describe_first_result_set N'SELECT * FROM LDW.vwDimSupplier'; 231 | 232 | EXEC sp_describe_first_result_set N'SELECT * FROM LDW.vwDimStockItem'; 233 | 234 | EXEC sp_describe_first_result_set N'SELECT * FROM LDW.vwDimCustomer'; 235 | 236 | EXEC sp_describe_first_result_set N'SELECT * FROM LDW.vwFactSales'; 237 | -------------------------------------------------------------------------------- /create_incremental_and_scd_objects.sql: -------------------------------------------------------------------------------- 1 | --Manual CETAS to load new Sales Order data 2 | CREATE EXTERNAL TABLE STG.FactSales 3 | WITH 4 | ( 5 | LOCATION = 'conformed/facts/factsales/incremental/2021-04-18', 6 | DATA_SOURCE = ExternalDataSourceDataLake, 7 | FILE_FORMAT = SynapseParquetFormat 8 | ) 9 | AS 10 | SELECT 11 | --Surrogate Keys 12 | DC.CustomerKey, 13 | --CAST(FORMAT(SO.OrderDate,'yyyyMMdd') AS INT) as OrderDateKey, 14 | SO.OrderDate, 15 | DSI.StockItemKey, 16 | DS.SupplierKey, 17 | --Degenerate Dimensions 18 | CAST(SO.OrderID AS INT) AS OrderID, 19 | CAST(SOL.OrderLineID AS INT) AS OrderLineID, 20 | --Measure 21 | CAST(SOL.Quantity AS INT) AS SalesOrderQuantity, 22 | CAST(SOL.UnitPrice AS DECIMAL(18,2)) AS SalesOrderUnitPrice 23 | FROM LDW.vwSalesOrdersLines SOL 24 | INNER JOIN LDW.vwSalesOrders SO ON SOL.OrderID = SO.OrderID 25 | LEFT JOIN LDW.vwDimCustomer DC ON DC.CustomerID = SO.CustomerID 26 | LEFT JOIN LDW.vwDimStockItem DSI ON DSI.StockItemID = SOL.StockItemID 27 | LEFT JOIN LDW.vwStockItems SI ON SI.StockItemID = DSI.StockItemID 28 | LEFT JOIN LDW.vwDimSupplier DS ON DS.SupplierID = SI.SupplierID 29 | WHERE SOL.FilePathDate = '2021-04-18' AND SO.FilePathDate = '2021-04-18'; 30 | 31 | --Dynamic SQL with a Stored Procedure to load Sales Data 32 | CREATE PROCEDURE STG.FactSalesLoad @ProcessDate DATE 33 | WITH ENCRYPTION 34 | AS 35 | 36 | BEGIN 37 | 38 | DECLARE @location varchar(100) 39 | 40 | IF OBJECT_ID('STG.FactSales') IS NOT NULL 41 | DROP EXTERNAL TABLE STG.FactSales 42 | 43 | SET @location = CONCAT('conformed/facts/factsales/incremental/',FORMAT (@ProcessDate, 'yyyy/MM/dd') ) 44 | 45 | DECLARE @CreateExternalTableString NVARCHAR(2000) 46 | 47 | SET @CreateExternalTableString = 48 | 'CREATE EXTERNAL TABLE STG.FactSales 49 | WITH 50 | ( 51 | LOCATION = ''' + @location + ''', 52 | DATA_SOURCE = ExternalDataSourceDataLake, 53 | FILE_FORMAT = SynapseParquetFormat 54 | ) 55 | AS 56 | SELECT 57 | --Surrogate Keys 58 | DC.CustomerKey, 59 | CAST(FORMAT(SO.OrderDate,''yyyyMMdd'') AS INT) as OrderDateKey, 60 | DSI.StockItemKey, 61 | DS.SupplierKey, 62 | --Degenerate Dimensions 63 | CAST(SO.OrderID AS INT) AS OrderID, 64 | CAST(SOL.OrderLineID AS INT) AS OrderLineID, 65 | --Measure 66 | CAST(SOL.Quantity AS INT) AS SalesOrderQuantity, 67 | CAST(SOL.UnitPrice AS DECIMAL(18,2)) AS SalesOrderUnitPrice 68 | FROM LDW.vwSalesOrdersLines SOL 69 | INNER JOIN LDW.vwSalesOrders SO ON SOL.OrderID = SO.OrderID 70 | LEFT JOIN LDW.vwDimCustomer DC ON DC.CustomerID = SO.CustomerID 71 | LEFT JOIN LDW.vwDimStockItem DSI ON DSI.StockItemID = SOL.StockItemID 72 | LEFT JOIN LDW.vwStockItems SI ON SI.StockItemID = DSI.StockItemID 73 | LEFT JOIN LDW.vwDimSupplier DS ON DS.SupplierID = SI.SupplierID 74 | WHERE SOL.FilePathDate = ''' + CAST(@ProcessDate AS CHAR(10)) + ''' AND SO.FilePathDate = ''' + CAST(@ProcessDate AS CHAR(10)) + '''' 75 | 76 | EXEC sp_executesql @CreateExternalTableString 77 | 78 | END 79 | 80 | --Run Procedure 81 | EXEC STG.FactSalesLoad '2021-04-19'; 82 | 83 | --Select and Load the Supplier Data Changes 84 | CREATE VIEW LDW.vwIncrementalSuppliers 85 | AS 86 | SELECT fct.*, 87 | fct.filepath(1) AS FilePathDate 88 | FROM 89 | OPENROWSET 90 | ( 91 | BULK 'sourcedatasystem/ChangedData/*/Purchasing_Suppliers/*.csv', 92 | DATA_SOURCE = 'ExternalDataSourceDataLake', 93 | FORMAT = 'CSV', 94 | PARSER_VERSION = '2.0', 95 | HEADER_ROW = TRUE, 96 | FIELDTERMINATOR ='|' 97 | ) AS fct 98 | 99 | --load 100 | DECLARE @MaxKey TINYINT 101 | SELECT @MaxKey = MAX(SupplierKey) FROM LDW.vwDimSupplier 102 | 103 | IF OBJECT_ID('STG.DimSupplier') IS NOT NULL 104 | DROP EXTERNAL TABLE STG.DimSupplier; 105 | 106 | CREATE EXTERNAL TABLE STG.DimSupplier 107 | WITH 108 | ( 109 | LOCATION = 'conformed/dimensions/dimsupplier/02', 110 | DATA_SOURCE = ExternalDataSourceDataLake, 111 | FILE_FORMAT = SynapseParquetFormat 112 | ) 113 | AS 114 | SELECT CAST(ROW_NUMBER() OVER(ORDER BY S.SupplierID) AS TINYINT) + @MaxKey AS SupplierKey, 115 | S.SupplierID, 116 | S.SupplierName, 117 | SC.SupplierCategoryName, 118 | CAST(S.ValidFrom AS DATE) AS ValidFromDate 119 | FROM LDW.vwIncrementalSuppliers S 120 | LEFT JOIN LDW.vwSupplierCategories SC ON SC.SupplierCategoryID = S.SupplierCategoryID 121 | WHERE S.FilePathDate = '2021-06-22' 122 | ORDER BY S.SupplierID; 123 | 124 | --Selecting data from the Supplier Dimension 125 | SELECT * 126 | FROM LDW.vwDimSupplier 127 | WHERE SupplierID IN (5,14) 128 | ORDER BY SupplierID; 129 | 130 | --Create View to construct a complete SCD Type 2 Dimension 131 | CREATE VIEW LDW.vwDimSupplierSCD 132 | AS 133 | SELECT SupplierKey, 134 | SupplierID, 135 | SupplierName, 136 | SupplierCategoryName, 137 | ValidFromDate, 138 | ISNULL(DATEADD(DAY,-1,LEAD(ValidFromDate,1) OVER (PARTITION BY SupplierID ORDER BY SupplierKey)),'2099-01-01') AS ValidToDate, 139 | CASE ROW_NUMBER() OVER(PARTITION BY SupplierID ORDER BY SupplierKey DESC) WHEN 1 THEN 'Y' ELSE 'N' END AS ActiveMember 140 | FROM LDW.vwDimSupplier 141 | 142 | --select from scd dimension view 143 | SELECT * 144 | FROM LDW.vwDimSupplierSCD 145 | WHERE SupplierID IN (1,5,14) 146 | ORDER BY SupplierID,SupplierKey 147 | 148 | --Amend Fact Loading Stored Procedure 149 | CREATE PROCEDURE STG.FactSalesLoadCSD @ProcessDate DATE 150 | WITH ENCRYPTION 151 | AS 152 | 153 | BEGIN 154 | 155 | DECLARE @location varchar(100) 156 | 157 | IF OBJECT_ID('STG.FactSales') IS NOT NULL 158 | DROP EXTERNAL TABLE STG.FactSales 159 | 160 | SET @location = CONCAT('conformed/facts/factsales/incremental/',FORMAT (@ProcessDate, 'yyyy/MM/dd') ) 161 | 162 | DECLARE @CreateExternalTableString NVARCHAR(2000) 163 | 164 | SET @CreateExternalTableString = 165 | 'CREATE EXTERNAL TABLE STG.FactSales 166 | WITH 167 | ( 168 | LOCATION = ''' + @location + ''', 169 | DATA_SOURCE = ExternalDataSourceDataLake, 170 | FILE_FORMAT = SynapseParquetFormat 171 | ) 172 | AS 173 | SELECT 174 | --Surrogate Keys 175 | DC.CustomerKey, 176 | CAST(FORMAT(SO.OrderDate,''yyyyMMdd'') AS INT) as OrderDateKey, 177 | DSI.StockItemKey, 178 | DS.SupplierKey, 179 | --Degenerate Dimensions 180 | CAST(SO.OrderID AS INT) AS OrderID, 181 | CAST(SOL.OrderLineID AS INT) AS OrderLineID, 182 | --Measure 183 | CAST(SOL.Quantity AS INT) AS SalesOrderQuantity, 184 | CAST(SOL.UnitPrice AS DECIMAL(18,2)) AS SalesOrderUnitPrice 185 | FROM LDW.vwSalesOrdersLines SOL 186 | INNER JOIN LDW.vwSalesOrders SO ON SOL.OrderID = SO.OrderID 187 | LEFT JOIN LDW.vwDimCustomer DC ON DC.CustomerID = SO.CustomerID 188 | LEFT JOIN LDW.vwDimStockItem DSI ON DSI.StockItemID = SOL.StockItemID 189 | LEFT JOIN LDW.vwStockItems SI ON SI.StockItemID = DSI.StockItemID 190 | LEFT JOIN LDW.vwDimSupplierSCD DS ON DS.SupplierID = SI.SupplierID AND SO.OrderDate BETWEEN DS.ValidFromDate AND DS.ValidToDate 191 | WHERE SOL.FilePathDate = ''' + CAST(@ProcessDate AS CHAR(10)) + ''' AND SO.FilePathDate = ''' + CAST(@ProcessDate AS CHAR(10)) + '''' 192 | 193 | EXEC sp_executesql @CreateExternalTableString 194 | 195 | END 196 | -------------------------------------------------------------------------------- /create_sql_objects.sql: -------------------------------------------------------------------------------- 1 | 2 | --Create Database and Data Sources 3 | 4 | CREATE DATABASE sqllogicaldw; 5 | 6 | CREATE EXTERNAL DATA SOURCE ExternalDataSourceDataLake 7 | WITH ( 8 | LOCATION = 'https://.dfs.core.windows.net/datalakehouse' 9 | ); 10 | 11 | CREATE SCHEMA LDW authorization dbo; 12 | 13 | CREATE MASTER KEY ENCRYPTION BY PASSWORD = ''; 14 | 15 | CREATE DATABASE SCOPED CREDENTIAL SynapseUserIdentity 16 | WITH IDENTITY = 'User Identity'; 17 | 18 | ALTER DATABASE sqllogicaldw COLLATE Latin1_General_100_BIN2_UTF8; 19 | 20 | /* 21 | CREATE DATABASE SCOPED CREDENTIAL [SasToken] 22 | WITH IDENTITY = 'SHARED ACCESS SIGNATURE', 23 | SECRET = ''; 24 | GO 25 | 26 | CREATE EXTERNAL DATA SOURCE ExternalDataSourceDataLakeSAS 27 | WITH ( LOCATION = 'https://storsynapsedemo.dfs.core.windows.net/datalakehouse', 28 | CREDENTIAL = SasToken 29 | ) 30 | */ 31 | 32 | --Create Views 33 | 34 | CREATE VIEW LDW.vwSalesOrders 35 | AS 36 | SELECT *, 37 | CAST(REPLACE(fct.filepath(1),'OrderDatePartition=','') AS DATE) AS FilePathDate 38 | FROM 39 | OPENROWSET 40 | ( 41 | BULK 'sourcedatapartitionsalesorder/*/*.csv', 42 | DATA_SOURCE = 'ExternalDataSourceDataLake', 43 | FORMAT = 'CSV', 44 | PARSER_VERSION = '2.0', 45 | HEADER_ROW = TRUE, 46 | FIELDTERMINATOR ='|' 47 | ) AS fct 48 | 49 | CREATE VIEW LDW.vwSalesOrdersLines 50 | AS 51 | SELECT *, 52 | CAST(REPLACE(fct.filepath(1),'OrderDate=','') AS DATE) AS FilePathDate 53 | FROM 54 | OPENROWSET 55 | ( 56 | BULK 'sourcedatapartitionsalesorderline/*/*.csv', 57 | DATA_SOURCE = 'ExternalDataSourceDataLake', 58 | FORMAT = 'CSV', 59 | PARSER_VERSION = '2.0', 60 | HEADER_ROW = TRUE, 61 | FIELDTERMINATOR ='|' 62 | ) AS fct 63 | 64 | --Data Related to Sales Orders 65 | 66 | CREATE VIEW LDW.vwCustomers 67 | AS 68 | SELECT * FROM 69 | OPENROWSET 70 | ( 71 | BULK 'sourcedatasystem/Sales_Customers/*.csv', 72 | DATA_SOURCE = 'ExternalDataSourceDataLake', 73 | FORMAT = 'CSV', 74 | PARSER_VERSION = '2.0', 75 | HEADER_ROW = TRUE, 76 | FIELDTERMINATOR ='|' 77 | ) AS fct 78 | 79 | 80 | CREATE VIEW LDW.vwCities 81 | AS 82 | SELECT CityID, 83 | CityName, 84 | StateProvinceID, 85 | LatestRecordedPopulation 86 | FROM 87 | OPENROWSET 88 | ( 89 | BULK 'sourcedatasystem/Application_Cities/*.csv', 90 | DATA_SOURCE = 'ExternalDataSourceDataLake', 91 | FORMAT = 'CSV', 92 | PARSER_VERSION = '2.0', 93 | HEADER_ROW = TRUE, 94 | FIELDTERMINATOR ='|' 95 | ) AS fct 96 | 97 | 98 | CREATE VIEW LDW.vwStateProvinces 99 | AS 100 | SELECT * FROM 101 | OPENROWSET 102 | ( 103 | BULK 'sourcedatasystem/Application_StateProvinces/*.csv', 104 | DATA_SOURCE = 'ExternalDataSourceDataLake', 105 | FORMAT = 'CSV', 106 | PARSER_VERSION = '2.0', 107 | HEADER_ROW = TRUE, 108 | FIELDTERMINATOR ='|' 109 | ) 110 | WITH 111 | ( 112 | StateProvinceID TINYINT, 113 | StateProvinceCode CHAR(2), 114 | StateProvinceName VARCHAR(30), 115 | CountryID TINYINT, 116 | SalesTerritory VARCHAR(14), 117 | LatestRecordedPopulation INT 118 | ) AS fct 119 | 120 | 121 | CREATE VIEW LDW.vwCountries 122 | AS 123 | SELECT * FROM 124 | OPENROWSET 125 | ( 126 | BULK 'sourcedatasystem/Application_Countries/*.csv', 127 | DATA_SOURCE = 'ExternalDataSourceDataLake', 128 | FORMAT = 'CSV', 129 | PARSER_VERSION = '2.0', 130 | FIRSTROW = 2, 131 | FIELDTERMINATOR ='|' 132 | ) 133 | WITH 134 | ( 135 | CountryID TINYINT 1, 136 | Country VARCHAR(50) 2, 137 | IsoCode3 CHAR(3) 4, 138 | CountryType VARCHAR(50) 6, 139 | LatestRecordedPopulation INT 7, 140 | Continent VARCHAR(50) 8, 141 | Region VARCHAR(50) 9, 142 | Subregion VARCHAR(50) 10 143 | ) AS fct 144 | 145 | 146 | CREATE VIEW LDW.vwBuyingGroups 147 | AS 148 | SELECT * FROM 149 | OPENROWSET 150 | ( 151 | BULK 'sourcedatasystem/Sales_BuyingGroups/*.csv', 152 | DATA_SOURCE = 'ExternalDataSourceDataLake', 153 | FORMAT = 'CSV', 154 | PARSER_VERSION = '2.0', 155 | HEADER_ROW = TRUE, 156 | FIELDTERMINATOR ='|' 157 | ) AS fct 158 | 159 | 160 | CREATE VIEW LDW.vwDeliveryMethods 161 | AS 162 | SELECT * FROM 163 | OPENROWSET 164 | ( 165 | BULK 'sourcedatasystem/Application_DeliveryMethods/*.csv', 166 | DATA_SOURCE = 'ExternalDataSourceDataLake', 167 | FORMAT = 'CSV', 168 | PARSER_VERSION = '2.0', 169 | HEADER_ROW = TRUE, 170 | FIELDTERMINATOR ='|' 171 | ) AS fct 172 | 173 | 174 | CREATE VIEW LDW.vwCustomerCategories 175 | AS 176 | SELECT * FROM 177 | OPENROWSET 178 | ( 179 | BULK 'sourcedatasystem/Sales_CustomerCategories/*.csv', 180 | DATA_SOURCE = 'ExternalDataSourceDataLake', 181 | FORMAT = 'CSV', 182 | PARSER_VERSION = '2.0', 183 | HEADER_ROW = TRUE, 184 | FIELDTERMINATOR ='|' 185 | ) AS fct 186 | 187 | 188 | CREATE VIEW LDW.vwPeople 189 | AS 190 | SELECT * FROM 191 | OPENROWSET 192 | ( 193 | BULK 'sourcedatasystem/Application_People/*.csv', 194 | DATA_SOURCE = 'ExternalDataSourceDataLake', 195 | FORMAT = 'CSV', 196 | PARSER_VERSION = '2.0', 197 | HEADER_ROW = TRUE, 198 | FIELDTERMINATOR ='|' 199 | ) AS fct 200 | 201 | --Data Related to Sales Order Lines 202 | 203 | CREATE VIEW LDW.vwSuppliers 204 | AS 205 | SELECT * FROM 206 | OPENROWSET 207 | ( 208 | BULK 'sourcedatasystem/Purchasing_Suppliers/*.csv', 209 | DATA_SOURCE = 'ExternalDataSourceDataLake', 210 | FORMAT = 'CSV', 211 | PARSER_VERSION = '2.0', 212 | HEADER_ROW = TRUE, 213 | FIELDTERMINATOR ='|' 214 | ) AS fct 215 | 216 | 217 | CREATE VIEW LDW.vwSupplierCategories 218 | AS 219 | SELECT * FROM 220 | OPENROWSET 221 | ( 222 | BULK 'sourcedatasystem/Purchasing_SupplierCategories/*.csv', 223 | DATA_SOURCE = 'ExternalDataSourceDataLake', 224 | FORMAT = 'CSV', 225 | PARSER_VERSION = '2.0', 226 | HEADER_ROW = TRUE, 227 | FIELDTERMINATOR ='|' 228 | ) AS fct 229 | 230 | 231 | CREATE VIEW LDW.vwStockItems 232 | AS 233 | SELECT * FROM 234 | OPENROWSET 235 | ( 236 | BULK 'sourcedatasystem/Warehouse_StockItems/*.csv', 237 | DATA_SOURCE = 'ExternalDataSourceDataLake', 238 | FORMAT = 'CSV', 239 | PARSER_VERSION = '2.0', 240 | HEADER_ROW = TRUE, 241 | FIELDTERMINATOR ='|' 242 | ) AS fct 243 | 244 | 245 | CREATE VIEW LDW.vwColors 246 | AS 247 | SELECT * FROM 248 | OPENROWSET 249 | ( 250 | BULK 'sourcedatasystem/Warehouse_Colors/*.csv', 251 | DATA_SOURCE = 'ExternalDataSourceDataLake', 252 | FORMAT = 'CSV', 253 | PARSER_VERSION = '2.0', 254 | HEADER_ROW = TRUE, 255 | FIELDTERMINATOR ='|' 256 | ) AS fct 257 | 258 | 259 | CREATE VIEW LDW.vwPackageTypes 260 | AS 261 | SELECT * FROM 262 | OPENROWSET 263 | ( 264 | BULK 'sourcedatasystem/Warehouse_PackageTypes/*.csv', 265 | DATA_SOURCE = 'ExternalDataSourceDataLake', 266 | FORMAT = 'CSV', 267 | PARSER_VERSION = '2.0', 268 | HEADER_ROW = TRUE, 269 | FIELDTERMINATOR ='|' 270 | ) AS fct 271 | 272 | --Querying Source Data Views 273 | 274 | --Aggregate Queries 275 | 276 | SELECT YEAR(SO.OrderDate) AS OrderDateYear, 277 | COUNT(SO.OrderDate) AS TotalOrderCount 278 | FROM LDW.vwSalesOrders SO 279 | GROUP BY YEAR(SO.OrderDate); 280 | 281 | SELECT ISNULL(C.ColorName,'No Colour') AS ColourName, 282 | SUM(SOL.Quantity) AS TotalOrderLineQuantity, 283 | SUM(SOL.UnitPrice) AS TotalOrderLineUnitPrice 284 | FROM LDW.vwSalesOrdersLines SOL 285 | INNER JOIN LDW.vwStockItems SI ON SI.StockItemID = SOL.StockItemID 286 | LEFT JOIN LDW.vwColors C ON C.ColorID = SI.ColorID 287 | GROUP BY ISNULL(C.ColorName,'No Colour'); 288 | 289 | SELECT 290 | YEAR(SO.OrderDate) AS OrderDateYear, 291 | SC.SupplierCategoryName, 292 | SUM(SOL.Quantity) AS TotalOrderLineQuantity, 293 | SUM(SOL.UnitPrice) AS TotalOrderLineUnitPrice 294 | FROM LDW.vwSalesOrdersLines SOL 295 | INNER JOIN LDW.vwSalesOrders SO ON SO.OrderID = SOL.OrderID 296 | INNER JOIN LDW.vwStockItems SI ON SI.StockItemID = SOL.StockItemID 297 | INNER JOIN LDW.vwSuppliers S ON SI.SupplierID = S.SupplierID 298 | INNER JOIN LDW.vwSupplierCategories SC ON SC.SupplierCategoryID = S.SupplierCategoryID 299 | GROUP BY YEAR(SO.OrderDate), 300 | SC.SupplierCategoryName; 301 | 302 | 303 | --Filtering and Manual Statistics Creation 304 | 305 | SELECT COUNT(SO.OrderID) AS TotalOrderCount 306 | FROM LDW.vwSalesOrders SO 307 | WHERE SO.OrderDate = '2017-02-16' 308 | 309 | 310 | EXEC sys.sp_create_openrowset_statistics N' 311 | SELECT OrderDate 312 | FROM 313 | OPENROWSET 314 | ( 315 | BULK ''sourcedatapartitionsalesorder/*/*.csv'', 316 | DATA_SOURCE = ''ExternalDataSourceDataLake'', 317 | FORMAT = ''CSV'', 318 | PARSER_VERSION = ''2.0'', 319 | HEADER_ROW = TRUE, 320 | FIELDTERMINATOR =''|'' 321 | ) AS fct 322 | ' 323 | 324 | --Pushing Filters down to the Folder 325 | 326 | SELECT YEAR(SO.OrderDate) AS OrderDateYear, 327 | COUNT(SO.OrderDate) AS TotalOrderCount 328 | FROM LDW.vwSalesOrders SO 329 | WHERE SO.FilePathDate = '2017-02-16' 330 | GROUP BY YEAR(SO.OrderDate) 331 | 332 | --Creating Views for Analytical Queries 333 | 334 | CREATE VIEW LDW.vwDimStockItems 335 | AS 336 | SELECT SI.StockItemID, 337 | SI.StockItemName, 338 | SI.LeadTimeDays, 339 | SI.TaxRate, 340 | SI.UnitPrice, 341 | SI.SearchDetails, 342 | PTUnit.PackageTypeName AS PackageTypeNameUnit, 343 | PTOut.PackageTypeName AS PackageTypeNameOuter, 344 | C.ColorName, 345 | S.SupplierName, 346 | S.PaymentDays, 347 | SC.SupplierCategoryName 348 | FROM LDW.vwStockItems SI 349 | LEFT JOIN LDW.vwPackageTypes PTUnit ON PTUnit.PackageTypeID = SI.UnitPackageID 350 | LEFT JOIN LDW.vwPackageTypes PTOut ON PTOut.PackageTypeID = SI.OuterPackageID 351 | LEFT JOIN LDW.vwColors C ON C.ColorID = SI.ColorID 352 | LEFT JOIN LDW.vwSuppliers S ON S.SupplierID = SI.SupplierID 353 | LEFT JOIN LDW.vwSupplierCategories SC ON SC.SupplierCategoryID = S.SupplierCategoryID 354 | 355 | 356 | CREATE VIEW LDW.vwDimCustomers 357 | AS 358 | SELECT C.CustomerID, 359 | C.CustomerName, 360 | C.AccountOpenedDate, 361 | C.CreditLimit, 362 | C.PaymentDays, 363 | CT.CityName AS CityNameDelivery, 364 | SP.StateProvinceCode AS StateProvinceCodeDelivery, 365 | SP.StateProvinceName AS StateProvinceNameDelivery, 366 | SP.SalesTerritory AS SalesTerritoryDelivery, 367 | CR.Country AS CountryDelivery, 368 | CR.Continent AS ContinentDelivery, 369 | CR.Region AS RegionDelivery, 370 | CR.Subregion AS SubregionDelivery, 371 | P.FullName AS PrimaryContactPersonName, 372 | CC.CustomerCategoryName, 373 | BG.BuyingGroupName, 374 | DM.DeliveryMethodName 375 | FROM LDW.vwCustomers C 376 | LEFT JOIN LDW.vwCities CT ON CT.CityID = C.DeliveryCityID 377 | LEFT JOIN LDW.vwStateProvinces SP ON SP.StateProvinceID = CT.StateProvinceID 378 | LEFT JOIN LDW.vwCountries CR ON CR.CountryID = SP.CountryID 379 | LEFT JOIN LDW.vwPeople P ON P.PersonID = C.PrimaryContactPersonID 380 | LEFT JOIN LDW.vwCustomerCategories CC ON CC.CustomerCategoryID = C.CustomerCategoryID 381 | LEFT JOIN LDW.vwBuyingGroups BG ON BG.BuyingGroupID = C.BuyingGroupID 382 | LEFT JOIN LDW.vwDeliveryMethods DM ON DM.DeliveryMethodID = C.DeliveryMethodID 383 | 384 | --Using Analytical Views 385 | 386 | SELECT DC.CustomerCategoryName, 387 | DS.PackageTypeNameUnit, 388 | SUM(SOL.Quantity) AS TotalOrderLineQuantity, 389 | SUM(SOL.UnitPrice) AS TotalOrderLineUnitPrice 390 | FROM LDW.vwSalesOrdersLines SOL 391 | INNER JOIN LDW.vwSalesOrders SO ON SO.OrderID = SOL.OrderID 392 | INNER JOIN LDW.vwDimCustomers DC ON DC.CustomerID = SO.CustomerID 393 | INNER JOIN LDW.vwDimStockItems DS ON DS.StockItemID = SOL.StockItemID 394 | GROUP BY DC.CustomerCategoryName, 395 | DS.PackageTypeNameUnit 396 | -------------------------------------------------------------------------------- /livestream/code.sql: -------------------------------------------------------------------------------- 1 | --FOR DATA FACTORY PIPELINE 2 | SELECT S.name AS SchemaName, 3 | T.name AS TableName 4 | FROM sys.tables T 5 | INNER JOIN sys.schemas S ON S.schema_id = T.schema_id 6 | WHERE S.name = 'SalesLT' 7 | 8 | --Data Factory For Each 9 | @activity('Get Views to Export').output.value 10 | 11 | --create linked service to sql database and add parameters 12 | 13 | --parameter for data lake 14 | sourcedata/sales/@{dataset().folder} 15 | --------------------------------------------------------------------------------