├── README.md ├── integrationRuntime └── integrationRuntime1.json ├── SQL └── Control Database │ ├── Templates │ ├── template.Load.TableType.sql │ ├── Complete │ │ ├── CleanSales.sql │ │ └── CleanProduct.sql │ ├── template.Load.Table.sql │ ├── template.Base.Table.sql │ ├── template.Load.ImportData.sql │ └── template.Base.CleanData.sql │ └── Scripts │ └── CreateDatabase.sql ├── linkedService ├── Data Warehouse Key Vault.json ├── mdwa_datalake.json ├── Salesforce1.json ├── CosmosDb.json ├── mdwa_azure_sql_database.json └── mdwa_control_database.json ├── dataset ├── mdwa_control_database.json ├── mdwa_cosmos_db.json ├── SalesforceObject1.json ├── mdwa_sql_database_parameter.json ├── mdwa_datalake_delimited_parameter.json ├── mdwa_sql_database_simple.json ├── PartitionedCsvFile.json └── DirtyCSVFile.json ├── pipeline ├── pipeline1.json ├── pipeline2.json ├── Populate Cosmos DB.json ├── Pause Synapse Analytics.json └── Get Metadata.json ├── dataflow ├── dataflow2.json └── Cleansing Data Flow.json ├── .vscode └── launch.json ├── .gitignore └── PowerShell └── Code Generation └── GenerateScripts.ps1 /README.md: -------------------------------------------------------------------------------- 1 | # Modern-Data-Warehouse-In-Azure -------------------------------------------------------------------------------- /integrationRuntime/integrationRuntime1.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "integrationRuntime1", 3 | "properties": { 4 | "type": "SelfHosted" 5 | } 6 | } -------------------------------------------------------------------------------- /SQL/Control Database/Templates/template.Load.TableType.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE Load.%%ENTITY_TABLE_NAME%%Type AS TABLE 2 | ( 3 | %%COLUMN_LIST_AS_DETYPED%% 4 | ) 5 | -------------------------------------------------------------------------------- /SQL/Control Database/Templates/Complete/CleanSales.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MattTheHow/Modern-Data-Warehouse-In-Azure/HEAD/SQL/Control Database/Templates/Complete/CleanSales.sql -------------------------------------------------------------------------------- /SQL/Control Database/Templates/Complete/CleanProduct.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MattTheHow/Modern-Data-Warehouse-In-Azure/HEAD/SQL/Control Database/Templates/Complete/CleanProduct.sql -------------------------------------------------------------------------------- /SQL/Control Database/Templates/template.Load.Table.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE Load.%%ENTITY_TABLE_NAME%% 2 | ( 3 | LoadId INT NOT NULL, 4 | LoadDateTime DATETIME NOT NULL, 5 | FileName VARCHAR(100) NOT NULL, 6 | %%COLUMN_LIST_AS_DETYPED%% 7 | ) 8 | -------------------------------------------------------------------------------- /linkedService/Data Warehouse Key Vault.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Data Warehouse Key Vault", 3 | "properties": { 4 | "annotations": [], 5 | "type": "AzureKeyVault", 6 | "typeProperties": { 7 | "baseUrl": "https://OS-training-keyvault-mh.vault.azure.net/" 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /SQL/Control Database/Templates/template.Base.Table.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE Base.%%ENTITY_TABLE_NAME%% 2 | ( 3 | LoadId INT NOT NULL 4 | ,LoadDateTime DATETIME NOT NULL 5 | ,FileName VARCHAR(100) NOT NULL 6 | %%COLUMN_LIST_WITH_DATATYPES%% 7 | %%SCD_2_HASH%% 8 | ) 9 | -------------------------------------------------------------------------------- /dataset/mdwa_control_database.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mdwa_control_database", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "mdwa_control_database", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "AzureSqlTable", 10 | "schema": [] 11 | } 12 | } -------------------------------------------------------------------------------- /dataset/mdwa_cosmos_db.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mdwa_cosmos_db", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "CosmosDb", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "CosmosDbSqlApiCollection", 10 | "typeProperties": { 11 | "collectionName": "mdwa" 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /dataset/SalesforceObject1.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "SalesforceObject1", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "Salesforce1", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "SalesforceObject", 10 | "schema": [], 11 | "typeProperties": { 12 | "objectApiName": "mytable__c" 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /linkedService/mdwa_datalake.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mdwa_datalake", 3 | "type": "Microsoft.DataFactory/factories/linkedservices", 4 | "properties": { 5 | "annotations": [], 6 | "type": "AzureBlobFS", 7 | "typeProperties": { 8 | "url": "https://mdwastorage.dfs.core.windows.net", 9 | "encryptedCredential": "ew0KICAiVmVyc2lvbiI6ICIyMDE3LTExLTMwIiwNCiAgIlByb3RlY3Rpb25Nb2RlIjogIktleSIsDQogICJTZWNyZXRDb250ZW50VHlwZSI6ICJQbGFpbnRleHQiLA0KICAiQ3JlZGVudGlhbElkIjogIk1EV0EtREFUQUZBQ1RPUllfYzkzZTJhN2YtYTRjZC00YWYyLTk1NjktYWQ2ZWJmOGE1YjM3Ig0KfQ==" 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /linkedService/Salesforce1.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Salesforce1", 3 | "type": "Microsoft.DataFactory/factories/linkedservices", 4 | "properties": { 5 | "annotations": [], 6 | "type": "Salesforce", 7 | "typeProperties": { 8 | "environmentUrl": "https://login.salesforce.com", 9 | "username": "gfdgdf", 10 | "encryptedCredential": "ew0KICAiVmVyc2lvbiI6ICIyMDE3LTExLTMwIiwNCiAgIlByb3RlY3Rpb25Nb2RlIjogIktleSIsDQogICJTZWNyZXRDb250ZW50VHlwZSI6ICJQbGFpbnRleHQiLA0KICAiQ3JlZGVudGlhbElkIjogIk1EV0EtREFUQUZBQ1RPUllfOTU0ZGFlYjAtZGVjYy00ZTdhLWE0NjQtZTQxYjI4MjUwOTgwIg0KfQ==" 11 | } 12 | } 13 | } -------------------------------------------------------------------------------- /linkedService/CosmosDb.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "CosmosDb", 3 | "type": "Microsoft.DataFactory/factories/linkedservices", 4 | "properties": { 5 | "annotations": [], 6 | "type": "CosmosDb", 7 | "typeProperties": { 8 | "connectionString": "AccountEndpoint=https://mdwa-cosmosdb.documents.azure.com:443/;Database=mdwa-records;", 9 | "encryptedCredential": "ew0KICAiVmVyc2lvbiI6ICIyMDE3LTExLTMwIiwNCiAgIlByb3RlY3Rpb25Nb2RlIjogIktleSIsDQogICJTZWNyZXRDb250ZW50VHlwZSI6ICJQbGFpbnRleHQiLA0KICAiQ3JlZGVudGlhbElkIjogIk1EV0EtREFUQUZBQ1RPUllfNjBmYzJlNDAtYjZjYi00ZWQ2LWEyNDMtMWQ4YmQ2MDQ1OTc5Ig0KfQ==" 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /SQL/Control Database/Templates/template.Load.ImportData.sql: -------------------------------------------------------------------------------- 1 | CREATE PROC Load.Raw%%ENTITY_TABLE_NAME%%_to_Load%%ENTITY_TABLE_NAME%% 2 | ( 3 | @Data Load.%%ENTITY_TABLE_NAME%%Type READONLY, 4 | @LoadId INT = NULL, 5 | @FileName VARCHAR(100) = NULL, 6 | @UTCDateTime DATETIME 7 | ) 8 | AS 9 | BEGIN 10 | 11 | DECLARE @InsertCount INT = 0 12 | 13 | INSERT INTO Load.%%ENTITY_TABLE_NAME%% 14 | SELECT 15 | @LoadId 16 | ,@UTCDateTime 17 | ,@FileName 18 | %%COLUMN_LIST%% 19 | FROM 20 | @Data 21 | 22 | END -------------------------------------------------------------------------------- /linkedService/mdwa_azure_sql_database.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mdwa_azure_sql_database", 3 | "type": "Microsoft.DataFactory/factories/linkedservices", 4 | "properties": { 5 | "annotations": [], 6 | "type": "AzureSqlDatabase", 7 | "typeProperties": { 8 | "connectionString": "integrated security=False;encrypt=True;connection timeout=30;data source=mdwa-sqlserver.database.windows.net;initial catalog=DemoDB;user id=MattHow", 9 | "encryptedCredential": "ew0KICAiVmVyc2lvbiI6ICIyMDE3LTExLTMwIiwNCiAgIlByb3RlY3Rpb25Nb2RlIjogIktleSIsDQogICJTZWNyZXRDb250ZW50VHlwZSI6ICJQbGFpbnRleHQiLA0KICAiQ3JlZGVudGlhbElkIjogIk1EV0EtREFUQUZBQ1RPUllfMTFlMzU2NjItNDk0ZS00ODNjLWEwMjctOTdhODY0NTdkOGFkIg0KfQ==" 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /dataset/mdwa_sql_database_parameter.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mdwa_sql_database_parameter", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "mdwa_azure_sql_database", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "parameters": { 9 | "TableName": { 10 | "type": "string" 11 | }, 12 | "SchemaName": { 13 | "type": "string" 14 | } 15 | }, 16 | "annotations": [], 17 | "type": "AzureSqlTable", 18 | "schema": [], 19 | "typeProperties": { 20 | "schema": { 21 | "value": "@dataset().SchemaName", 22 | "type": "Expression" 23 | }, 24 | "table": { 25 | "value": "@dataset().TableName", 26 | "type": "Expression" 27 | } 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /linkedService/mdwa_control_database.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mdwa_control_database", 3 | "type": "Microsoft.DataFactory/factories/linkedservices", 4 | "properties": { 5 | "annotations": [], 6 | "type": "AzureSqlDatabase", 7 | "typeProperties": { 8 | "connectionString": "integrated security=False;encrypt=True;connection timeout=30;data source=mdwa-sqlserver.database.windows.net;initial catalog=\"Demo Control DB\";user id=MattHow", 9 | "encryptedCredential": "ew0KICAiVmVyc2lvbiI6ICIyMDE3LTExLTMwIiwNCiAgIlByb3RlY3Rpb25Nb2RlIjogIktleSIsDQogICJTZWNyZXRDb250ZW50VHlwZSI6ICJQbGFpbnRleHQiLA0KICAiQ3JlZGVudGlhbElkIjogIk1EV0EtREFUQUZBQ1RPUllfNjliMWU5MGQtMmJkOC00MWQ3LTk0MDItNGE1OTM1NmZjZjY1Ig0KfQ==" 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /pipeline/pipeline1.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pipeline1", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "Execute Cleansing Data Flow", 7 | "type": "ExecuteDataFlow", 8 | "dependsOn": [], 9 | "policy": { 10 | "timeout": "7.00:00:00", 11 | "retry": 0, 12 | "retryIntervalInSeconds": 30, 13 | "secureOutput": false, 14 | "secureInput": false 15 | }, 16 | "userProperties": [], 17 | "typeProperties": { 18 | "dataflow": { 19 | "referenceName": "Cleansing Data Flow", 20 | "type": "DataFlowReference" 21 | }, 22 | "compute": { 23 | "coreCount": 8, 24 | "computeType": "General" 25 | } 26 | } 27 | } 28 | ], 29 | "annotations": [] 30 | } 31 | } -------------------------------------------------------------------------------- /dataset/mdwa_datalake_delimited_parameter.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mdwa_datalake_delimited_parameter", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "mdwa_datalake", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "parameters": { 9 | "FileName": { 10 | "type": "string" 11 | } 12 | }, 13 | "annotations": [], 14 | "type": "DelimitedText", 15 | "typeProperties": { 16 | "location": { 17 | "type": "AzureBlobFSLocation", 18 | "fileName": { 19 | "value": "@dataset().FileName", 20 | "type": "Expression" 21 | }, 22 | "folderPath": "RAW", 23 | "fileSystem": "datalake" 24 | }, 25 | "columnDelimiter": ",", 26 | "escapeChar": "\\", 27 | "firstRowAsHeader": true, 28 | "quoteChar": "\"" 29 | }, 30 | "schema": [] 31 | } 32 | } -------------------------------------------------------------------------------- /pipeline/pipeline2.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pipeline2", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "dataflow2", 7 | "type": "ExecuteDataFlow", 8 | "dependsOn": [], 9 | "policy": { 10 | "timeout": "7.00:00:00", 11 | "retry": 0, 12 | "retryIntervalInSeconds": 30, 13 | "secureOutput": false, 14 | "secureInput": false 15 | }, 16 | "userProperties": [], 17 | "typeProperties": { 18 | "dataflow": { 19 | "referenceName": "dataflow2", 20 | "type": "DataFlowReference" 21 | }, 22 | "compute": { 23 | "coreCount": 8, 24 | "computeType": "General" 25 | } 26 | } 27 | }, 28 | { 29 | "name": "Switch1", 30 | "type": "Switch", 31 | "dependsOn": [], 32 | "userProperties": [], 33 | "typeProperties": { 34 | "cases": [ 35 | { 36 | "value": "8" 37 | } 38 | ] 39 | } 40 | } 41 | ], 42 | "annotations": [] 43 | } 44 | } -------------------------------------------------------------------------------- /dataset/mdwa_sql_database_simple.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mdwa_sql_database_simple", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "mdwa_azure_sql_database", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "AzureSqlTable", 10 | "schema": [ 11 | { 12 | "name": "SalesSystemId", 13 | "type": "int", 14 | "precision": 10 15 | }, 16 | { 17 | "name": "SalesPerson", 18 | "type": "nvarchar" 19 | }, 20 | { 21 | "name": "SalesAmount", 22 | "type": "decimal", 23 | "precision": 10, 24 | "scale": 2 25 | }, 26 | { 27 | "name": "ProductName", 28 | "type": "nvarchar" 29 | }, 30 | { 31 | "name": "ProductId", 32 | "type": "int", 33 | "precision": 10 34 | }, 35 | { 36 | "name": "CustomerId", 37 | "type": "int", 38 | "precision": 10 39 | } 40 | ], 41 | "typeProperties": { 42 | "schema": "dbo", 43 | "table": "FactInternetSales" 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /dataset/PartitionedCsvFile.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "PartitionedCsvFile", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "mdwa_datalake", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobFSLocation", 13 | "folderPath": "RAW/Partitioned", 14 | "fileSystem": "datalake" 15 | }, 16 | "columnDelimiter": ",", 17 | "escapeChar": "\\", 18 | "firstRowAsHeader": true, 19 | "quoteChar": "\"" 20 | }, 21 | "schema": [ 22 | { 23 | "name": "SalesSystemId", 24 | "type": "String" 25 | }, 26 | { 27 | "name": "SalesPerson", 28 | "type": "String" 29 | }, 30 | { 31 | "name": "SalesAmount", 32 | "type": "String" 33 | }, 34 | { 35 | "name": "ProductName", 36 | "type": "String" 37 | }, 38 | { 39 | "name": "ProductId", 40 | "type": "String" 41 | }, 42 | { 43 | "name": "CustomerId", 44 | "type": "String" 45 | } 46 | ] 47 | } 48 | } -------------------------------------------------------------------------------- /dataset/DirtyCSVFile.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "DirtyCSVFile", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "mdwa_datalake", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobFSLocation", 13 | "fileName": "DemoSales.csv", 14 | "folderPath": "RAW", 15 | "fileSystem": "datalake" 16 | }, 17 | "columnDelimiter": ",", 18 | "escapeChar": "\\", 19 | "firstRowAsHeader": true, 20 | "quoteChar": "\"" 21 | }, 22 | "schema": [ 23 | { 24 | "name": "SalesSystemId", 25 | "type": "String" 26 | }, 27 | { 28 | "name": "SalesPerson", 29 | "type": "String" 30 | }, 31 | { 32 | "name": "SalesAmount", 33 | "type": "String" 34 | }, 35 | { 36 | "name": "ProductName", 37 | "type": "String" 38 | }, 39 | { 40 | "name": "ProductId", 41 | "type": "String" 42 | }, 43 | { 44 | "name": "CustomerId", 45 | "type": "String" 46 | } 47 | ] 48 | } 49 | } -------------------------------------------------------------------------------- /pipeline/Populate Cosmos DB.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Populate Cosmos DB", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "Copy Warehouse Data", 7 | "type": "Copy", 8 | "dependsOn": [], 9 | "policy": { 10 | "timeout": "7.00:00:00", 11 | "retry": 0, 12 | "retryIntervalInSeconds": 30, 13 | "secureOutput": false, 14 | "secureInput": false 15 | }, 16 | "userProperties": [], 17 | "typeProperties": { 18 | "source": { 19 | "type": "AzureSqlSource", 20 | "sqlReaderQuery": "SELECT TOP 10\n\tCustomerKey,\n\tSUM(SalesAmount) AS TotalSalesAmount,\n\tCOUNT(*) AS OrderCount\nFROM \n\t[dbo].[FactInternetSales]\nGROUP BY CustomerKey", 21 | "queryTimeout": "02:00:00" 22 | }, 23 | "sink": { 24 | "type": "CosmosDbSqlApiSink", 25 | "writeBehavior": "insert" 26 | }, 27 | "enableStaging": false 28 | }, 29 | "inputs": [ 30 | { 31 | "referenceName": "mdwa_sql_database_simple", 32 | "type": "DatasetReference" 33 | } 34 | ], 35 | "outputs": [ 36 | { 37 | "referenceName": "mdwa_cosmos_db", 38 | "type": "DatasetReference" 39 | } 40 | ] 41 | } 42 | ], 43 | "annotations": [] 44 | } 45 | } -------------------------------------------------------------------------------- /dataflow/dataflow2.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dataflow2", 3 | "properties": { 4 | "type": "MappingDataFlow", 5 | "typeProperties": { 6 | "sources": [ 7 | { 8 | "dataset": { 9 | "referenceName": "mdwa_sql_database_simple", 10 | "type": "DatasetReference" 11 | }, 12 | "name": "SQLDataSource" 13 | } 14 | ], 15 | "sinks": [ 16 | { 17 | "dataset": { 18 | "referenceName": "PartitionedCsvFile", 19 | "type": "DatasetReference" 20 | }, 21 | "name": "PartitionedDataSet" 22 | } 23 | ], 24 | "transformations": [], 25 | "script": "\n\nsource(output(\n\t\tSalesSystemId as integer,\n\t\tSalesPerson as string,\n\t\tSalesAmount as decimal(10,2),\n\t\tProductName as string,\n\t\tProductId as integer,\n\t\tCustomerId as integer\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tisolationLevel: 'READ_UNCOMMITTED',\n\tformat: 'table') ~> SQLDataSource\nSQLDataSource sink(input(\n\t\tSalesSystemId as string,\n\t\tSalesPerson as string,\n\t\tSalesAmount as string,\n\t\tProductName as string,\n\t\tProductId as string,\n\t\tCustomerId as string\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tpartitionBy('key',\n\t\t0,\n\t\tCustomerId\n\t)) ~> PartitionedDataSet" 26 | } 27 | } 28 | } -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "PowerShell Launch Current File", 9 | "type": "PowerShell", 10 | "request": "launch", 11 | "script": "${file}", 12 | "args": [], 13 | "cwd": "${file}" 14 | }, 15 | { 16 | "name": "PowerShell Launch Current File in Temporary Console", 17 | "type": "PowerShell", 18 | "request": "launch", 19 | "script": "${file}", 20 | "args": [], 21 | "cwd": "${file}", 22 | "createTemporaryIntegratedConsole": true 23 | }, 24 | { 25 | "name": "PowerShell Launch Current File w/Args Prompt", 26 | "type": "PowerShell", 27 | "request": "launch", 28 | "script": "${file}", 29 | "args": [ 30 | "${command:SpecifyScriptArgs}" 31 | ], 32 | "cwd": "${file}" 33 | }, 34 | { 35 | "name": "PowerShell Attach to Host Process", 36 | "type": "PowerShell", 37 | "request": "attach" 38 | }, 39 | { 40 | "name": "PowerShell Interactive Session", 41 | "type": "PowerShell", 42 | "request": "launch", 43 | "cwd": "" 44 | }, 45 | { 46 | "name": "PowerShell Attach Interactive Session Runspace", 47 | "type": "PowerShell", 48 | "request": "attach", 49 | "processId": "current" 50 | } 51 | ] 52 | } -------------------------------------------------------------------------------- /pipeline/Pause Synapse Analytics.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Pause Synapse Analytics", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "Pause Synapse Analytics", 7 | "description": "", 8 | "type": "WebActivity", 9 | "dependsOn": [ 10 | { 11 | "activity": "Obtain Bearer Token", 12 | "dependencyConditions": [ 13 | "Succeeded" 14 | ] 15 | } 16 | ], 17 | "policy": { 18 | "timeout": "7.00:00:00", 19 | "retry": 0, 20 | "retryIntervalInSeconds": 30, 21 | "secureOutput": false, 22 | "secureInput": false 23 | }, 24 | "userProperties": [], 25 | "typeProperties": { 26 | "url": "https://management.azure.com/subscriptions/d6c9085f-1bef-4f0b-85e8-a30400cbbd0c/resourceGroups/moderndw/providers/Microsoft.Sql/servers/mdwa-sqlserver.database.windows.net/databases/mdwa-sqlwarehouse/pause?api-version=2017-10-01-preview ", 27 | "method": "POST", 28 | "headers": { 29 | "Authorization": { 30 | "value": "@concat('Bearer ', activity('Obtain Bearer Token').output.access_token)", 31 | "type": "Expression" 32 | }, 33 | "Content-Type": "application/json" 34 | } 35 | } 36 | }, 37 | { 38 | "name": "Obtain Bearer Token", 39 | "type": "WebActivity", 40 | "dependsOn": [], 41 | "policy": { 42 | "timeout": "7.00:00:00", 43 | "retry": 0, 44 | "retryIntervalInSeconds": 30, 45 | "secureOutput": false, 46 | "secureInput": false 47 | }, 48 | "userProperties": [], 49 | "typeProperties": { 50 | "url": "https://login.microsoftonline.com/6771b25a-f4d8-4f9f-9fcc-e7468a5cdc46/oauth2/token", 51 | "method": "POST", 52 | "headers": { 53 | "Content-Type": "application/x-www-form-urlencoded" 54 | }, 55 | "body": "grant_type=client_credentials&client_id=320a7909-de06-4b4c-bd59-5523947e9319&client_secret=-7]345RybyzoGoQUAjedEPuvMazHjo@@&resource=https%3A%2F%2Fmanagement.azure.com" 56 | } 57 | } 58 | ], 59 | "parameters": { 60 | "InstanceName": { 61 | "type": "string" 62 | } 63 | }, 64 | "annotations": [] 65 | } 66 | } -------------------------------------------------------------------------------- /dataflow/Cleansing Data Flow.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Cleansing Data Flow", 3 | "properties": { 4 | "type": "MappingDataFlow", 5 | "typeProperties": { 6 | "sources": [ 7 | { 8 | "dataset": { 9 | "referenceName": "DirtyCSVFile", 10 | "type": "DatasetReference" 11 | }, 12 | "name": "DirtyCSVFile" 13 | } 14 | ], 15 | "sinks": [ 16 | { 17 | "dataset": { 18 | "referenceName": "mdwa_sql_database_simple", 19 | "type": "DatasetReference" 20 | }, 21 | "name": "DirtyCSVFileSink" 22 | } 23 | ], 24 | "transformations": [ 25 | { 26 | "name": "TrimColumns" 27 | }, 28 | { 29 | "name": "StandardiseNulls" 30 | } 31 | ], 32 | "script": "\n\nsource(output(\n\t\tSalesSystemId as string,\n\t\tSalesPerson as string,\n\t\tSalesAmount as string,\n\t\tProductName as string,\n\t\tProductId as string,\n\t\tCustomerId as string\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false) ~> DirtyCSVFile\nDirtyCSVFile derive(SalesSystemId = trim(SalesSystemId),\n\t\tSalesPerson = trim(SalesPerson),\n\t\tSalesAmount = trim(SalesAmount),\n\t\tProductName = trim(ProductName),\n\t\tProductId = trim(ProductId),\n\t\tCustomerId = trim(CustomerId)) ~> TrimColumns\nTrimColumns derive(SalesSystemId = toInteger(replace(replace(SalesSystemId, ' ',''),'Unknown','')),\n\t\tSalesPerson = replace(replace(SalesPerson, ' ',''),'Unknown',''),\n\t\tSalesAmount = toDecimal(replace(replace(SalesAmount, ' ',''),'Unknown','')),\n\t\tProductName = replace(replace(ProductName, ' ',''),'Unknown',''),\n\t\tProductId = toInteger(replace(replace(ProductId, ' ',''),'Unknown','')),\n\t\tCustomerId = toInteger(replace(replace(CustomerId, ' ',''),'Unknown',''))) ~> StandardiseNulls\nStandardiseNulls sink(input(\n\t\tSalesSystemId as integer,\n\t\tSalesPerson as string,\n\t\tSalesAmount as decimal(10,2),\n\t\tProductName as string,\n\t\tProductId as integer,\n\t\tCustomerId as integer\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tdeletable:false,\n\tinsertable:true,\n\tupdateable:false,\n\tupsertable:false,\n\tformat: 'table') ~> DirtyCSVFileSink" 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /pipeline/Get Metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Get Metadata", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "Get Metadata", 7 | "type": "GetMetadata", 8 | "dependsOn": [], 9 | "policy": { 10 | "timeout": "7.00:00:00", 11 | "retry": 0, 12 | "retryIntervalInSeconds": 30, 13 | "secureOutput": false, 14 | "secureInput": false 15 | }, 16 | "userProperties": [], 17 | "typeProperties": { 18 | "dataset": { 19 | "referenceName": "mdwa_datalake_delimited_parameter", 20 | "type": "DatasetReference", 21 | "parameters": { 22 | "FileName": "DemoSales.csv" 23 | } 24 | }, 25 | "fieldList": [ 26 | "columnCount", 27 | "itemName" 28 | ], 29 | "storeSettings": { 30 | "type": "AzureBlobFSReadSettings", 31 | "recursive": true 32 | }, 33 | "formatSettings": { 34 | "type": "DelimitedTextReadSettings" 35 | } 36 | } 37 | }, 38 | { 39 | "name": "Check File Metadata", 40 | "type": "SqlServerStoredProcedure", 41 | "dependsOn": [ 42 | { 43 | "activity": "Get Metadata", 44 | "dependencyConditions": [ 45 | "Succeeded" 46 | ] 47 | } 48 | ], 49 | "policy": { 50 | "timeout": "7.00:00:00", 51 | "retry": 0, 52 | "retryIntervalInSeconds": 30, 53 | "secureOutput": false, 54 | "secureInput": false 55 | }, 56 | "userProperties": [], 57 | "typeProperties": { 58 | "storedProcedureName": "Audit.CheckFileMetadata", 59 | "storedProcedureParameters": { 60 | "ColumnCount": { 61 | "value": { 62 | "value": "@activity('Get Metadata').output.columnCount", 63 | "type": "Expression" 64 | }, 65 | "type": "Int32" 66 | }, 67 | "Filename": { 68 | "value": { 69 | "value": "@activity('Get Metadata').output.itemName", 70 | "type": "Expression" 71 | }, 72 | "type": "test" 73 | } 74 | } 75 | }, 76 | "linkedServiceName": { 77 | "referenceName": "mdwa_control_database", 78 | "type": "LinkedServiceReference" 79 | } 80 | } 81 | ], 82 | "parameters": { 83 | "SchemaName": { 84 | "type": "string", 85 | "defaultValue": "Stage" 86 | }, 87 | "TableName": { 88 | "type": "string", 89 | "defaultValue": "Sales" 90 | }, 91 | "FileName": { 92 | "type": "string", 93 | "defaultValue": "DemoSales.csv" 94 | } 95 | }, 96 | "annotations": [] 97 | } 98 | } -------------------------------------------------------------------------------- /SQL/Control Database/Templates/template.Base.CleanData.sql: -------------------------------------------------------------------------------- 1 | 2 | DROP PROC IF EXISTS Base.Load%%ENTITY_TABLE_NAME%%_to_Base%%ENTITY_TABLE_NAME%% 3 | GO 4 | 5 | CREATE PROC Base.Load%%ENTITY_TABLE_NAME%%_to_Base%%ENTITY_TABLE_NAME%% 6 | ( 7 | @LoadId INTEGER = NULL 8 | ) 9 | 10 | AS 11 | BEGIN 12 | 13 | DECLARE @ErrorMessage VARCHAR(250) 14 | DECLARE @TotalCount INT = 0 15 | DECLARE @InsertCount INT = 0 16 | DECLARE @RejectCount INT = 0 17 | 18 | DROP TABLE IF EXISTS #tmp 19 | 20 | -- Check data exists in LOAD for LoadID 21 | IF NOT EXISTS 22 | ( 23 | SELECT 1 24 | FROM Load.%%ENTITY_TABLE_NAME%% AS l 25 | WHERE l.LoadID = @LoadID 26 | ) 27 | BEGIN 28 | SELECT @ErrorMessage = CONCAT ('No data found in LOAD for LoadID ', @LoadID) 29 | RAISERROR(@ErrorMessage,16,1) 30 | RETURN 31 | END 32 | 33 | -- Check that data does not exist in BASE already 34 | IF EXISTS 35 | ( 36 | SELECT 1 37 | FROM Base.%%ENTITY_TABLE_NAME%% AS b 38 | WHERE b.LoadID = @LoadID 39 | ) 40 | BEGIN 41 | SELECT @ErrorMessage = CONCAT ('Data already exists in BASE for LoadID ', @LoadID) 42 | RAISERROR(@ErrorMessage,16,1) 43 | RETURN 44 | END 45 | 46 | -- Get the total count from LOAD 47 | SELECT @TotalCount = COUNT(*) FROM Load.%%ENTITY_TABLE_NAME%% WHERE LoadID = @LoadID 48 | 49 | 50 | -- Check for any Bad rows 51 | SELECT 52 | LoadId, 53 | LoadDateTime, 54 | FileName 55 | %%COLUMN_LIST_BASE%% 56 | %%COLUMN_LIST_WITH_TRY_CAST%% 57 | INTO #tmp 58 | FROM Load.%%ENTITY_TABLE_NAME%% 59 | WHERE LoadId = @LoadId 60 | %%SCREEN_SQL%% 61 | 62 | PRINT 'Loaded rows to temp, starting insert' 63 | 64 | -- If there are bad records, put all records into Bad 65 | IF EXISTS 66 | ( 67 | SELECT 1 68 | FROM #tmp AS t 69 | WHERE (%%COLUMN_LIST_AS_TRY%%) IS NULL 70 | ) 71 | BEGIN 72 | INSERT INTO Bad.%%ENTITY_TABLE_NAME%% 73 | ( 74 | LoadId, 75 | LoadDateTime, 76 | FileName 77 | %%COLUMN_LIST_BASE%% 78 | ,ErrorColumn 79 | ) 80 | SELECT 81 | LoadId, 82 | LoadDateTime, 83 | FileName 84 | %%COLUMN_LIST_BASE%% 85 | %%COLUMN_LIST_AS_CASE%% 86 | FROM #tmp 87 | 88 | -- Get the number of bad rows 89 | SELECT @RejectCount = COUNT(*) FROM #tmp 90 | WHERE (%%COLUMN_LIST_AS_TRY%%) IS NULL 91 | 92 | SELECT @ErrorMessage = CONCAT ('Data not loaded to Base.%%ENTITY_TABLE_NAME%% - See Bad.%%ENTITY_TABLE_NAME%% for LoadID ', @LoadID) 93 | RAISERROR(@ErrorMessage,16,1) 94 | RETURN 95 | END 96 | ELSE 97 | BEGIN 98 | PRINT 'Loaded rows to temp, starting insert' 99 | 100 | 101 | BEGIN TRANSACTION 102 | -- Insert records into BASE 103 | BEGIN TRY 104 | INSERT INTO Base.%%ENTITY_TABLE_NAME%% 105 | ( 106 | LoadID, 107 | LoadDateTime, 108 | FileName 109 | %%COLUMN_LIST_BASE%% 110 | ) 111 | SELECT 112 | t.LoadID, 113 | t.LoadDateTime, 114 | t.FileName 115 | %%SOURCE_COLUMN_LIST_WITH_RULES%% 116 | %%COLUMN_LIST_AS_SCD2_STRING%% 117 | FROM #tmp AS t 118 | 119 | SET @InsertCount = @@ROWCOUNT 120 | PRINT CONCAT(@@ROWCOUNT, ' records loaded') 121 | END TRY 122 | BEGIN CATCH 123 | IF @@TRANCOUNT > 0 ROLLBACK TRANSACTION 124 | -- SELECT @ErrorMessage = CONCAT ('Error loading data into Base for LoadID ', @LoadID) 125 | -- RAISERROR(@ErrorMessage,16,1) 126 | -- RETURN 127 | END CATCH 128 | END 129 | 130 | IF @@TRANCOUNT > 0 COMMIT TRANSACTION 131 | 132 | SELECT 133 | @TotalCount AS TotalCount, 134 | @InsertCount AS InsertCount, 135 | @RejectCount AS RejectCount, 136 | OBJECT_NAME(@@PROCID) AS ProcName 137 | 138 | END -------------------------------------------------------------------------------- /SQL/Control Database/Scripts/CreateDatabase.sql: -------------------------------------------------------------------------------- 1 | 2 | DROP TABLE IF EXISTS Metadata.ColumnRule 3 | DROP TABLE IF EXISTS Metadata.RuleDefinition 4 | DROP TABLE IF EXISTS Metadata.EntityColumn 5 | DROP TABLE IF EXISTS Metadata.Entity 6 | DROP TABLE IF EXISTS Metadata.SourceSystem 7 | GO 8 | 9 | DROP PROC IF EXISTS Metadata.ObtainEntityMetadata 10 | DROP PROC IF EXISTS Guide.ObtainSampleValues 11 | 12 | DROP SCHEMA IF EXISTS Metadata 13 | GO 14 | 15 | DROP SCHEMA IF EXISTS Guide 16 | GO 17 | 18 | CREATE SCHEMA Metadata 19 | GO 20 | 21 | CREATE SCHEMA Guide 22 | GO 23 | 24 | CREATE TABLE Metadata.SourceSystem 25 | ( 26 | SourceSystemId INT IDENTITY(1,1) NOT NULL 27 | ,SourceSystemName VARCHAR(100) NOT NULL 28 | 29 | ,CONSTRAINT pk_SourceSystem PRIMARY KEY (SourceSystemId) 30 | ) 31 | GO 32 | 33 | CREATE TABLE Metadata.Entity 34 | ( 35 | EntityId INT IDENTITY(1,1) NOT NULL 36 | ,EntityName VARCHAR(100) NOT NULL 37 | ,EntityType VARCHAR(10) NOT NULL 38 | ,SourceSystemId INT NOT NULL 39 | ,EntityObtainString NVARCHAR(500) NULL 40 | ,EntityRootPath NVARCHAR(500) NOT NULL 41 | 42 | ,CONSTRAINT pk_Entity PRIMARY KEY (EntityId) 43 | ,CONSTRAINT fk_Entity_SourceSystem FOREIGN KEY (SourceSystemId) REFERENCES Metadata.SourceSystem (SourceSystemId) 44 | ) 45 | GO 46 | 47 | CREATE TABLE Metadata.EntityColumn 48 | ( 49 | EntityColumnId INT IDENTITY(1,1) NOT NULL 50 | ,EntityColumnName VARCHAR(100) NOT NULL 51 | ,EntityId INT NOT NULL 52 | ,IsPrimaryKey BIT NOT NULL 53 | ,EntitySchemaVersion INT NOT NULL 54 | ,ColumnDataType VARCHAR(100) NOT NULL 55 | 56 | 57 | ,ColumnOrder INT NOT NULL 58 | 59 | 60 | 61 | ,CONSTRAINT pk_EntityColumn PRIMARY KEY (EntityColumnId) 62 | ,CONSTRAINT fk_EntityColumn_Entity FOREIGN KEY (EntityId) REFERENCES Metadata.Entity (EntityId) 63 | ) 64 | GO 65 | 66 | CREATE TABLE Metadata.RuleDefinition 67 | ( 68 | RuleId INT IDENTITY (1,1) NOT NULL 69 | ,RuleCode VARCHAR(25) NOT NULL 70 | ,RuleDefinition NVARCHAR(500) NOT NULL 71 | 72 | ,CONSTRAINT pk_Rule PRIMARY KEY (RuleId) 73 | ) 74 | GO 75 | 76 | CREATE TABLE Metadata.ColumnRule 77 | ( 78 | EntityColumnId INT NOT NULL 79 | ,RuleId INT NOT NULL 80 | ,RuleOrder INT DEFAULT 0 -- the lowest ruleorder will be the outermost rule. All others will become nested 81 | 82 | ,CONSTRAINT pk_ColumnRule PRIMARY KEY (RuleId, EntityColumnId) 83 | ,CONSTRAINT fk_ColumnRule_RuleDef FOREIGN KEY (RuleId) REFERENCES Metadata.RuleDefinition (RuleId) 84 | ,CONSTRAINT fk_ColumnRule_EntityColumn FOREIGN KEY (EntityColumnId) REFERENCES Metadata.EntityColumn (EntityColumnId) 85 | ) 86 | GO 87 | 88 | CREATE PROC Metadata.ObtainEntityMetadata 89 | ( 90 | @EntityName VARCHAR(100) = NULL 91 | ) 92 | 93 | AS 94 | BEGIN 95 | 96 | SELECT 97 | e.EntityName 98 | ,e.EntityType 99 | FROM Metadata.Entity AS e 100 | WHERE EntityName = COALESCE(@EntityName, EntityName) 101 | 102 | SELECT 103 | e.EntityName 104 | ,e.EntityType 105 | ,ec.EntityColumnName 106 | ,ec.EntitySchemaVersion 107 | ,ec.ColumnDataType 108 | 109 | 110 | ,ec.ColumnOrder 111 | ,ec.IsPrimaryKey 112 | 113 | FROM Metadata.Entity AS e 114 | INNER JOIN Metadata.EntityColumn AS ec 115 | ON ec.EntityId = e.EntityId 116 | WHERE EntityName = COALESCE(@EntityName, EntityName) 117 | ORDER BY ColumnOrder 118 | 119 | SELECT 120 | ec.EntityColumnName 121 | ,rd.RuleDefinition 122 | ,cr.RuleOrder 123 | FROM Metadata.EntityColumn AS ec 124 | INNER JOIN Metadata.ColumnRule AS cr 125 | ON cr.EntityColumnId = ec.EntityColumnId 126 | INNER JOIN Metadata.RuleDefinition AS rd 127 | ON rd.RuleId = cr.RuleId 128 | ORDER BY ec.EntityColumnId ASC, cr.RuleOrder ASC 129 | 130 | END 131 | GO 132 | 133 | CREATE PROC Guide.ObtainSampleValues 134 | AS 135 | 136 | SELECT 137 | 'dbo' AS SchemaName 138 | ,'Sales' AS TableName 139 | ,'DemoSales.csv' AS FileName 140 | 141 | GO 142 | 143 | INSERT INTO Metadata.SourceSystem 144 | VALUES 145 | ('SalesSystem') 146 | GO 147 | 148 | INSERT INTO Metadata.Entity 149 | VALUES 150 | ('Sales', 'Source', 1, 'SELECT * FROM dbo.Sales', '/SalesSystem/Sales/'), 151 | ('Product', 'Source', 1, NULL, '/SalesSystem/Product/') 152 | GO 153 | 154 | INSERT INTO Metadata.EntityColumn 155 | VALUES 156 | ('SalesSystemId', 1, 1, 0, 'INTEGER', 0), 157 | ('SalesPerson', 1, 1, 0, 'NVARCHAR(100)',1), 158 | ('SalesAmount', 1, 0, 0, 'DECIMAL(10,2)',2), 159 | ('ProductName', 1, 0, 0, 'NVARCHAR(100)',3), 160 | ('ProductId', 1, 0, 0, 'INTEGER',4), 161 | ('CustomerId',1, 0, 0, 'INTEGER',5) 162 | GO 163 | 164 | INSERT INTO Metadata.RuleDefinition 165 | VALUES 166 | ('CleanString', 'LTRIM(RTRIM(REPLACE(REPLACE(REPLACE([%%COLUMN_NAME%%], char(9),''''),char(10),''''),char(13),'''')))'), 167 | ('SupplyZero', 'COALESCE([%%COLUMN_NAME%%],CAST(0 AS FLOAT))'), 168 | ('StandariseNull','NULLIF(NULLIF([%%COLUMN_NAME%%],''Unknown''),''N/A'')') 169 | GO 170 | 171 | INSERT INTO Metadata.ColumnRule 172 | VALUES 173 | (2,1,0) 174 | ,(3,2,0) 175 | ,(4,1,1) 176 | ,(4,3,0) 177 | ,(5,3,0) 178 | ,(6,3,0) 179 | GO 180 | 181 | SELECT COUNT(*) AS 'Metadata.ColumnRule' FROM Metadata.ColumnRule 182 | SELECT COUNT(*) AS 'Metadata.RuleDefinition' FROM Metadata.RuleDefinition 183 | SELECT COUNT(*) AS 'Metadata.EntityColumn' FROM Metadata.EntityColumn 184 | SELECT COUNT(*) AS 'Metadata.Entity' FROM Metadata.Entity 185 | SELECT COUNT(*) AS 'Metadata.SourceSystem' FROM Metadata.SourceSystem 186 | 187 | 188 | EXEC Guide.ObtainSampleValues 189 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.suo 8 | *.user 9 | *.userosscache 10 | *.sln.docstates 11 | 12 | # User-specific files (MonoDevelop/Xamarin Studio) 13 | *.userprefs 14 | 15 | # Build results 16 | [Dd]ebug/ 17 | [Dd]ebugPublic/ 18 | [Rr]elease/ 19 | [Rr]eleases/ 20 | x64/ 21 | x86/ 22 | bld/ 23 | [Bb]in/ 24 | [Oo]bj/ 25 | [Ll]og/ 26 | 27 | # Visual Studio 2015/2017 cache/options directory 28 | .vs/ 29 | # Uncomment if you have tasks that create the project's static files in wwwroot 30 | #wwwroot/ 31 | 32 | # Visual Studio 2017 auto generated files 33 | Generated\ Files/ 34 | 35 | # MSTest test Results 36 | [Tt]est[Rr]esult*/ 37 | [Bb]uild[Ll]og.* 38 | 39 | # NUNIT 40 | *.VisualState.xml 41 | TestResult.xml 42 | 43 | # Build Results of an ATL Project 44 | [Dd]ebugPS/ 45 | [Rr]eleasePS/ 46 | dlldata.c 47 | 48 | # Benchmark Results 49 | BenchmarkDotNet.Artifacts/ 50 | 51 | # .NET Core 52 | project.lock.json 53 | project.fragment.lock.json 54 | artifacts/ 55 | **/Properties/launchSettings.json 56 | 57 | # StyleCop 58 | StyleCopReport.xml 59 | 60 | # Files built by Visual Studio 61 | *_i.c 62 | *_p.c 63 | *_i.h 64 | *.ilk 65 | *.meta 66 | *.obj 67 | *.iobj 68 | *.pch 69 | *.pdb 70 | *.ipdb 71 | *.pgc 72 | *.pgd 73 | *.rsp 74 | *.sbr 75 | *.tlb 76 | *.tli 77 | *.tlh 78 | *.tmp 79 | *.tmp_proj 80 | *.log 81 | *.vspscc 82 | *.vssscc 83 | .builds 84 | *.pidb 85 | *.svclog 86 | *.scc 87 | 88 | # Chutzpah Test files 89 | _Chutzpah* 90 | 91 | # Visual C++ cache files 92 | ipch/ 93 | *.aps 94 | *.ncb 95 | *.opendb 96 | *.opensdf 97 | *.sdf 98 | *.cachefile 99 | *.VC.db 100 | *.VC.VC.opendb 101 | 102 | # Visual Studio profiler 103 | *.psess 104 | *.vsp 105 | *.vspx 106 | *.sap 107 | 108 | # Visual Studio Trace Files 109 | *.e2e 110 | 111 | # TFS 2012 Local Workspace 112 | $tf/ 113 | 114 | # Guidance Automation Toolkit 115 | *.gpState 116 | 117 | # ReSharper is a .NET coding add-in 118 | _ReSharper*/ 119 | *.[Rr]e[Ss]harper 120 | *.DotSettings.user 121 | 122 | # JustCode is a .NET coding add-in 123 | .JustCode 124 | 125 | # TeamCity is a build add-in 126 | _TeamCity* 127 | 128 | # DotCover is a Code Coverage Tool 129 | *.dotCover 130 | 131 | # AxoCover is a Code Coverage Tool 132 | .axoCover/* 133 | !.axoCover/settings.json 134 | 135 | # Visual Studio code coverage results 136 | *.coverage 137 | *.coveragexml 138 | 139 | # NCrunch 140 | _NCrunch_* 141 | .*crunch*.local.xml 142 | nCrunchTemp_* 143 | 144 | # MightyMoose 145 | *.mm.* 146 | AutoTest.Net/ 147 | 148 | # Web workbench (sass) 149 | .sass-cache/ 150 | 151 | # Installshield output folder 152 | [Ee]xpress/ 153 | 154 | # DocProject is a documentation generator add-in 155 | DocProject/buildhelp/ 156 | DocProject/Help/*.HxT 157 | DocProject/Help/*.HxC 158 | DocProject/Help/*.hhc 159 | DocProject/Help/*.hhk 160 | DocProject/Help/*.hhp 161 | DocProject/Help/Html2 162 | DocProject/Help/html 163 | 164 | # Click-Once directory 165 | publish/ 166 | 167 | # Publish Web Output 168 | *.[Pp]ublish.xml 169 | *.azurePubxml 170 | # Note: Comment the next line if you want to checkin your web deploy settings, 171 | # but database connection strings (with potential passwords) will be unencrypted 172 | *.pubxml 173 | *.publishproj 174 | 175 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 176 | # checkin your Azure Web App publish settings, but sensitive information contained 177 | # in these scripts will be unencrypted 178 | PublishScripts/ 179 | 180 | # NuGet Packages 181 | *.nupkg 182 | # The packages folder can be ignored because of Package Restore 183 | **/[Pp]ackages/* 184 | # except build/, which is used as an MSBuild target. 185 | !**/[Pp]ackages/build/ 186 | # Uncomment if necessary however generally it will be regenerated when needed 187 | #!**/[Pp]ackages/repositories.config 188 | # NuGet v3's project.json files produces more ignorable files 189 | *.nuget.props 190 | *.nuget.targets 191 | 192 | # Microsoft Azure Build Output 193 | csx/ 194 | *.build.csdef 195 | 196 | # Microsoft Azure Emulator 197 | ecf/ 198 | rcf/ 199 | 200 | # Windows Store app package directories and files 201 | AppPackages/ 202 | BundleArtifacts/ 203 | Package.StoreAssociation.xml 204 | _pkginfo.txt 205 | *.appx 206 | 207 | # Visual Studio cache files 208 | # files ending in .cache can be ignored 209 | *.[Cc]ache 210 | # but keep track of directories ending in .cache 211 | !*.[Cc]ache/ 212 | 213 | # Others 214 | ClientBin/ 215 | ~$* 216 | *~ 217 | *.dbmdl 218 | *.dbproj.schemaview 219 | *.jfm 220 | *.pfx 221 | *.publishsettings 222 | orleans.codegen.cs 223 | 224 | # Including strong name files can present a security risk 225 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 226 | #*.snk 227 | 228 | # Since there are multiple workflows, uncomment next line to ignore bower_components 229 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 230 | #bower_components/ 231 | 232 | # RIA/Silverlight projects 233 | Generated_Code/ 234 | 235 | # Backup & report files from converting an old project file 236 | # to a newer Visual Studio version. Backup files are not needed, 237 | # because we have git ;-) 238 | _UpgradeReport_Files/ 239 | Backup*/ 240 | UpgradeLog*.XML 241 | UpgradeLog*.htm 242 | ServiceFabricBackup/ 243 | *.rptproj.bak 244 | 245 | # SQL Server files 246 | *.mdf 247 | *.ldf 248 | *.ndf 249 | 250 | # Business Intelligence projects 251 | *.rdl.data 252 | *.bim.layout 253 | *.bim_*.settings 254 | *.rptproj.rsuser 255 | 256 | # Microsoft Fakes 257 | FakesAssemblies/ 258 | 259 | # GhostDoc plugin setting file 260 | *.GhostDoc.xml 261 | 262 | # Node.js Tools for Visual Studio 263 | .ntvs_analysis.dat 264 | node_modules/ 265 | 266 | # Visual Studio 6 build log 267 | *.plg 268 | 269 | # Visual Studio 6 workspace options file 270 | *.opt 271 | 272 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 273 | *.vbw 274 | 275 | # Visual Studio LightSwitch build output 276 | **/*.HTMLClient/GeneratedArtifacts 277 | **/*.DesktopClient/GeneratedArtifacts 278 | **/*.DesktopClient/ModelManifest.xml 279 | **/*.Server/GeneratedArtifacts 280 | **/*.Server/ModelManifest.xml 281 | _Pvt_Extensions 282 | 283 | # Paket dependency manager 284 | .paket/paket.exe 285 | paket-files/ 286 | 287 | # FAKE - F# Make 288 | .fake/ 289 | 290 | # JetBrains Rider 291 | .idea/ 292 | *.sln.iml 293 | 294 | # CodeRush 295 | .cr/ 296 | 297 | # Python Tools for Visual Studio (PTVS) 298 | __pycache__/ 299 | *.pyc 300 | 301 | # Cake - Uncomment if you are using it 302 | # tools/** 303 | # !tools/packages.config 304 | 305 | # Tabs Studio 306 | *.tss 307 | 308 | # Telerik's JustMock configuration file 309 | *.jmconfig 310 | 311 | # BizTalk build output 312 | *.btp.cs 313 | *.btm.cs 314 | *.odx.cs 315 | *.xsd.cs 316 | 317 | # OpenCover UI analysis results 318 | OpenCover/ 319 | 320 | # Azure Stream Analytics local run output 321 | ASALocalRun/ 322 | 323 | # MSBuild Binary and Structured Log 324 | *.binlog 325 | 326 | # NVidia Nsight GPU debugger configuration file 327 | *.nvuser 328 | 329 | # MFractors (Xamarin productivity tool) working folder 330 | .mfractor/ 331 | -------------------------------------------------------------------------------- /PowerShell/Code Generation/GenerateScripts.ps1: -------------------------------------------------------------------------------- 1 | 2 | ######################################## 3 | # 4 | # Author : Matt How 5 | # 6 | # Desc : Generate SQL scripts using 7 | # metadata from SQL database 8 | # 9 | # Usage : Should be run with 10 | # PowerShell(64) 11 | # 12 | ######################################## 13 | 14 | cls 15 | 16 | # Define variable values 17 | $TemplateRepo = "" 18 | $AzureSQLDatabaseServer = "" 19 | $AzureSQLDatabaseName = "" 20 | $AzureSQLDatabaseAdminUserName = "" 21 | $AzureSQLDatabaseAdminPassword = Read-Host "Enter password for $($AzureSQLDatabaseAdminUserName)" 22 | $Query = "EXEC Metadata.ObtainEntityMetadata" 23 | 24 | # Create a connection string for the control database 25 | $ControlDBConnection = "Server='$AzureSQLDatabaseServer';Database='$AzureSQLDatabaseName';User ID='$AzureSQLDatabaseAdminUserName';Password='$AzureSQLDatabaseAdminPassword';" 26 | 27 | # Fetch the required templates from the repo 28 | $TemplateArray = @( 29 | "template.Base.CleanData.sql", 30 | "template.Base.Table.sql", 31 | "template.Load.ImportData.sql", 32 | "template.Load.Table.sql", 33 | "template.Load.TableType.sql" 34 | ) 35 | 36 | # Obtain the metadata dataset 37 | $Metadata = Invoke-Sqlcmd -ConnectionString $ControlDBConnection -Query $Query -OutputAs DataSet 38 | 39 | $EntityDataset = $Metadata.Tables[0].Rows 40 | $ColumnDataset = $Metadata.Tables[1].Rows 41 | $RuleDataset = $Metadata.Tables[2].Rows 42 | 43 | # Iterate each entity 44 | ForEach($Item in $EntityDataset) 45 | { 46 | 47 | $Entity = $Item.EntityTableName 48 | $SourceSystemName = $Item.SourceSystemName 49 | $isSCD = If($Item.SCD2Columns -gt 0) {$true} Else {$False} 50 | $ScreenSQL = If($Item.screenSQL -eq $null) {""} Else {$Item.screenSQL} 51 | 52 | $ColumnListWithRules = "" 53 | $ColumnListWithCast = "" 54 | $ColumnListAsDetyped = "" 55 | $ColumnListWithDataTypes = "" 56 | $ColumnList = "" 57 | $ColumnListAsString = "" 58 | $ColumnListAsSCD2String = "" 59 | $ColumnListAsTry = "" 60 | $ColumnListWithTryCast = "" 61 | $ColumnListBase = "" 62 | $ColumnListAsCase = ",CASE " 63 | $EntityConstraintsArray = @() 64 | 65 | 66 | If( $isSCD ) 67 | { 68 | $SCD2HashString = ",HASHBYTES('SHA2_256',CAST(CONCAT(%%COLUMN_LIST_AS_SCD2_STRING%%) AS VARBINARY(64)) )" 69 | $EntityHashColumnName = $Entity + "HashSCD2" 70 | } 71 | Else 72 | { 73 | $SCD2HashString = "" 74 | $EntityHashColumnName = "" 75 | } 76 | 77 | # Create the required column lists 78 | ForEach($ColumnObject in $ColumnDataset | Where { $_.EntityTableName -eq $Entity}) 79 | { 80 | 81 | # Define the column variables 82 | $ColumnName = $ColumnObject.ColumnName 83 | $ColumnId = $ColumnObject.EntityColumnId 84 | $ColumnSCD = If($ColumnObject.isSCDType2) {$True} Else {$False} 85 | $DataType = $ColumnObject.DataType 86 | $RuleDefinition = "" 87 | $CurrentRuleDefinition = "" 88 | $ColumnDefinition = "" 89 | 90 | # Create a column list with datatypes 91 | $ColumnValueWithCast = $(",CAST([$ColumnName] AS $DataType)") 92 | $ColumnListWithCast += "`t`t`t" + $ColumnValueWithCast + "`n" 93 | 94 | # Create a column list in a detyped DDL format 95 | $ColumnValueDetyped = ",[" + $ColumnName + "] NVARCHAR(1000)" 96 | $ColumnListAsDetyped += $ColumnValueDetyped + "`n" 97 | 98 | # Create a column list with try_casts and rules 99 | $ColumnValue = ",[" + $ColumnName + "]" 100 | $ColumnList += $ColumnValue + "`n" 101 | $ColumnListAsString += $ColumnValue 102 | 103 | # Create column lists for mapped columns 104 | If ($ColumnObject.isMapped) 105 | { 106 | # Create a column list in a typed DDL format 107 | $ColumnValueWithDataTypes = ",[" + $ColumnName + "] " + $DataType 108 | $ColumnListWithDataTypes += $ColumnValueWithDataTypes + "`n" 109 | 110 | # Create a column list as case statement 111 | $ColumnValueAsCase = "WHEN [try_$ColumnName] IS NULL THEN '$ColumnName'" 112 | $ColumnListAsCase += $ColumnValueAsCase + "`n" 113 | 114 | $ColumnListAsTry += $($ColumnValue + "+") -replace ",\[", "[try_" 115 | 116 | $ColumnListBase += $ColumnValue + "`n" 117 | 118 | ForEach($Rule in $RuleDataset | Where {$_.EntityColumnId -eq $ColumnId}) 119 | { 120 | # Prepare the rules for the column 121 | $RuleString = $Rule.RuleDefinition 122 | 123 | If($Rule.RuleOrder -eq 0) 124 | { 125 | $ColumnDefinition = $RuleString -replace '%%COLUMN_NAME%%', $ColumnName 126 | } 127 | Else 128 | { 129 | $ColumnDefinition = $RuleString -replace '\[%%COLUMN_NAME%%\]', $ColumnDefinition 130 | } 131 | } 132 | 133 | If( $ColumnDefinition -eq "") {$ColumnDefinition = $ColumnName} 134 | 135 | $ColumnListWithRules += "`t`t`t," + $ColumnDefinition + " AS [$($ColumnName)]`n" 136 | 137 | # Create a column list with try_casts and rules 138 | $ColumnValueWithTryCast = ",CASE WHEN TRY_CAST(" + $ColumnDefinition + " AS $DataType) IS NULL THEN NULL ELSE 1 END AS [try_$($ColumnName)]" 139 | $ColumnListWithTryCast += $ColumnValueWithTryCast + "`n" 140 | 141 | If( $ColumnSCD ) 142 | { 143 | $ColumnListAsSCD2String += "[" + $ColumnName + "]," + "'||'," 144 | } 145 | } 146 | 147 | # Create array of primary key columns 148 | If($ColumnObject.isPrimaryKey -eq $true) { $EntityConstraintsArray += $ColumnName } 149 | } 150 | 151 | $PrimaryKeyColumns = $EntityConstraintsArray -join ', ' 152 | $ColumnListAsCase += " END AS [ErrorColumn] " 153 | 154 | If ($ColumnListWithDataTypes) {$ColumnListWithDataTypes = $ColumnListWithDataTypes.TrimStart(',')} 155 | 156 | If( $isSCD ) 157 | { 158 | $SCD2HashString = $SCD2HashString -replace "%%COLUMN_LIST_AS_SCD2_STRING%%", $ColumnListAsSCD2String.TrimEnd("'||',") 159 | } 160 | 161 | ForEach($Template in $TemplateArray) 162 | { 163 | # Assign the template output 164 | $TemplateOutput = Get-Content -Path $($TemplateRepo + '\' + $Template) 165 | 166 | # Replace placeholders with values 167 | $TemplateOutput = $TemplateOutput -replace "%%ENTITY_TABLE_NAME%%", $Entity 168 | $TemplateOutput = $TemplateOutput -replace "%%SCREEN_SQL%%", $ScreenSQL 169 | $TemplateOutput = $TemplateOutput -replace "%%SOURCE_SYSTEM%%", $SourceSystemName 170 | $TemplateOutput = $TemplateOutput -replace "%%SOURCE_COLUMN_LIST_WITH_RULES%%", $ColumnListWithRules 171 | $TemplateOutput = $TemplateOutput -replace "%%SOURCE_COLUMN_LIST%%", $ColumnListWithCast 172 | $TemplateOutput = $TemplateOutput -replace "%%COLUMN_LIST%%", $ColumnList 173 | $TemplateOutput = $TemplateOutput -replace "%%COLUMN_LIST_BASE%%", $ColumnListBase 174 | $TemplateOutput = $TemplateOutput -replace "%%COLUMN_LIST_AS_DETYPED%%", $ColumnListAsDetyped.TrimStart(',') 175 | $TemplateOutput = $TemplateOutput -replace "%%COLUMN_LIST_WITH_DATATYPES%%", $ColumnListWithDataTypes 176 | $TemplateOutput = $TemplateOutput -replace "%%COLUMN_LIST_WITH_TRY_CAST%%", $ColumnListWithTryCast 177 | $TemplateOutput = $TemplateOutput -replace "%%COLUMN_LIST_AS_STRING%%", $ColumnListAsString.TrimStart(',') 178 | $TemplateOutput = $TemplateOutput -replace "%%COLUMN_LIST_AS_SCD2_STRING%%", $SCD2HashString 179 | $TemplateOutput = $TemplateOutput -replace "%%COLUMN_LIST_AS_TRY%%", $ColumnListAsTry.TrimEnd('+') 180 | $TemplateOutput = $TemplateOutput -replace "%%COLUMN_LIST_AS_CASE%%", $ColumnListAsCase 181 | $TemplateOutput = $TemplateOutput -replace "%%SCD_2_HASH%%", $EntityHashColumnName 182 | 183 | $FileName = $Entity + "_" + $($Template -replace 'template.', '') 184 | 185 | $OutputPath = $TemplateRepo + "\Complete\$FileName" 186 | New-Item -Path $OutputPath -ItemType File -Force | Out-Null 187 | $TemplateOutput | Out-File $OutputPath 188 | 189 | Write-Host "Completed generation of $Template" 190 | } 191 | } 192 | --------------------------------------------------------------------------------