├── .gitignore ├── Docs ├── SampleStoredProcedures │ ├── ADF_DEMO │ │ ├── LOGS │ │ │ ├── SP_END_PIPELINE_EXECUTION.sql │ │ │ └── SP_START_PIPELINE_EXECUTION.sql │ │ └── TRIPPIN │ │ │ ├── SP_LOAD_PEOPLE_STAGE.sql │ │ │ └── SP_MERGE_PEOPLE.sql │ └── MyDatabase │ │ └── MySchema │ │ └── MyStoredProcedure.sql ├── Screenshots │ ├── adf-pipeline-overview.png │ ├── connector-overview.png │ └── connector-sequence-diagram.png ├── SnowflakeConnectorAdfArmTemplate.json └── SnowflakeDbSetup.sql ├── LICENSE ├── README.md ├── SnowflakeAdfConnector.csproj ├── SnowflakeConnectorAdf.cs └── host.json /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # Mac specific files to ignore 5 | .DS_Store 6 | 7 | # Azure Functions localsettings file 8 | local.settings.json 9 | 10 | # User-specific files 11 | *.suo 12 | *.user 13 | *.userosscache 14 | *.sln.docstates 15 | 16 | # User-specific files (MonoDevelop/Xamarin Studio) 17 | *.userprefs 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | bld/ 27 | [Bb]in/ 28 | [Oo]bj/ 29 | [Ll]og/ 30 | 31 | # VS Code user settings 32 | .vscode/ 33 | 34 | # Visual Studio 2015 cache/options directory 35 | .vs/ 36 | # Uncomment if you have tasks that create the project's static files in wwwroot 37 | #wwwroot/ 38 | 39 | # MSTest test Results 40 | [Tt]est[Rr]esult*/ 41 | [Bb]uild[Ll]og.* 42 | 43 | # NUNIT 44 | *.VisualState.xml 45 | TestResult.xml 46 | 47 | # Build Results of an ATL Project 48 | [Dd]ebugPS/ 49 | [Rr]eleasePS/ 50 | dlldata.c 51 | 52 | # DNX 53 | project.lock.json 54 | project.fragment.lock.json 55 | artifacts/ 56 | 57 | *_i.c 58 | *_p.c 59 | *_i.h 60 | *.ilk 61 | *.meta 62 | *.obj 63 | *.pch 64 | *.pdb 65 | *.pgc 66 | *.pgd 67 | *.rsp 68 | *.sbr 69 | *.tlb 70 | *.tli 71 | *.tlh 72 | *.tmp 73 | *.tmp_proj 74 | *.log 75 | *.vspscc 76 | *.vssscc 77 | .builds 78 | *.pidb 79 | *.svclog 80 | *.scc 81 | 82 | # Chutzpah Test files 83 | _Chutzpah* 84 | 85 | # Visual C++ cache files 86 | ipch/ 87 | *.aps 88 | *.ncb 89 | *.opendb 90 | *.opensdf 91 | *.sdf 92 | *.cachefile 93 | *.VC.db 94 | *.VC.VC.opendb 95 | 96 | # Visual Studio profiler 97 | *.psess 98 | *.vsp 99 | *.vspx 100 | *.sap 101 | 102 | # TFS 2012 Local Workspace 103 | $tf/ 104 | 105 | # Guidance Automation Toolkit 106 | *.gpState 107 | 108 | # ReSharper is a .NET coding add-in 109 | _ReSharper*/ 110 | *.[Rr]e[Ss]harper 111 | *.DotSettings.user 112 | 113 | # JustCode is a .NET coding add-in 114 | .JustCode 115 | 116 | # TeamCity is a build add-in 117 | _TeamCity* 118 | 119 | # DotCover is a Code Coverage Tool 120 | *.dotCover 121 | 122 | # NCrunch 123 | _NCrunch_* 124 | .*crunch*.local.xml 125 | nCrunchTemp_* 126 | 127 | # MightyMoose 128 | *.mm.* 129 | AutoTest.Net/ 130 | 131 | # Web workbench (sass) 132 | .sass-cache/ 133 | 134 | # Installshield output folder 135 | [Ee]xpress/ 136 | 137 | # DocProject is a documentation generator add-in 138 | DocProject/buildhelp/ 139 | DocProject/Help/*.HxT 140 | DocProject/Help/*.HxC 141 | DocProject/Help/*.hhc 142 | DocProject/Help/*.hhk 143 | DocProject/Help/*.hhp 144 | DocProject/Help/Html2 145 | DocProject/Help/html 146 | 147 | # Click-Once directory 148 | publish/ 149 | 150 | # Publish Web Output 151 | *.[Pp]ublish.xml 152 | *.azurePubxml 153 | # TODO: Comment the next line if you want to checkin your web deploy settings 154 | # but database connection strings (with potential passwords) will be unencrypted 155 | #*.pubxml 156 | *.publishproj 157 | 158 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 159 | # checkin your Azure Web App publish settings, but sensitive information contained 160 | # in these scripts will be unencrypted 161 | PublishScripts/ 162 | 163 | # NuGet Packages 164 | *.nupkg 165 | # The packages folder can be ignored because of Package Restore 166 | **/packages/* 167 | # except build/, which is used as an MSBuild target. 168 | !**/packages/build/ 169 | # Uncomment if necessary however generally it will be regenerated when needed 170 | #!**/packages/repositories.config 171 | # NuGet v3's project.json files produces more ignoreable files 172 | *.nuget.props 173 | *.nuget.targets 174 | 175 | # Microsoft Azure Build Output 176 | csx/ 177 | *.build.csdef 178 | 179 | # Microsoft Azure Emulator 180 | ecf/ 181 | rcf/ 182 | 183 | # Windows Store app package directories and files 184 | AppPackages/ 185 | BundleArtifacts/ 186 | Package.StoreAssociation.xml 187 | _pkginfo.txt 188 | 189 | # Visual Studio cache files 190 | # files ending in .cache can be ignored 191 | *.[Cc]ache 192 | # but keep track of directories ending in .cache 193 | !*.[Cc]ache/ 194 | 195 | # Others 196 | ClientBin/ 197 | ~$* 198 | *~ 199 | *.dbmdl 200 | *.dbproj.schemaview 201 | *.jfm 202 | *.pfx 203 | *.publishsettings 204 | node_modules/ 205 | orleans.codegen.cs 206 | 207 | # Since there are multiple workflows, uncomment next line to ignore bower_components 208 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 209 | #bower_components/ 210 | 211 | # RIA/Silverlight projects 212 | Generated_Code/ 213 | 214 | # Backup & report files from converting an old project file 215 | # to a newer Visual Studio version. Backup files are not needed, 216 | # because we have git ;-) 217 | _UpgradeReport_Files/ 218 | Backup*/ 219 | UpgradeLog*.XML 220 | UpgradeLog*.htm 221 | 222 | # SQL Server files 223 | *.mdf 224 | *.ldf 225 | 226 | # Business Intelligence projects 227 | *.rdl.data 228 | *.bim.layout 229 | *.bim_*.settings 230 | 231 | # Microsoft Fakes 232 | FakesAssemblies/ 233 | 234 | # GhostDoc plugin setting file 235 | *.GhostDoc.xml 236 | 237 | # Node.js Tools for Visual Studio 238 | .ntvs_analysis.dat 239 | 240 | # Visual Studio 6 build log 241 | *.plg 242 | 243 | # Visual Studio 6 workspace options file 244 | *.opt 245 | 246 | # Visual Studio LightSwitch build output 247 | **/*.HTMLClient/GeneratedArtifacts 248 | **/*.DesktopClient/GeneratedArtifacts 249 | **/*.DesktopClient/ModelManifest.xml 250 | **/*.Server/GeneratedArtifacts 251 | **/*.Server/ModelManifest.xml 252 | _Pvt_Extensions 253 | 254 | # Paket dependency manager 255 | .paket/paket.exe 256 | paket-files/ 257 | 258 | # FAKE - F# Make 259 | .fake/ 260 | 261 | # JetBrains Rider 262 | .idea/ 263 | *.sln.iml 264 | 265 | # CodeRush 266 | .cr/ 267 | 268 | # Python Tools for Visual Studio (PTVS) 269 | __pycache__/ 270 | *.pyc -------------------------------------------------------------------------------- /Docs/SampleStoredProcedures/ADF_DEMO/LOGS/SP_END_PIPELINE_EXECUTION.sql: -------------------------------------------------------------------------------- 1 | UPDATE LOGS.ADF_PIPELINE_EXECUTION_LOG 2 | SET 3 | EXECUTION_STATUS = 'Success' 4 | ,EXECUTION_END_AT = TO_TIMESTAMP_NTZ(CURRENT_TIMESTAMP()) 5 | ,ROWS_LOADED = $ROWS_LOADED 6 | ,ROWS_INSERTED = $ROWS_INSERTED 7 | ,ROWS_UPDATED = $ROWS_UPDATED 8 | ,ROWS_DELETED = $ROWS_DELETED 9 | ,META_UPDATED_AT = TO_TIMESTAMP_NTZ(CURRENT_TIMESTAMP()) 10 | WHERE 1 = 1 11 | AND ADF_PIPELINE_RUN_ID = $ADF_PIPELINE_RUN_ID 12 | ; 13 | -------------------------------------------------------------------------------- /Docs/SampleStoredProcedures/ADF_DEMO/LOGS/SP_START_PIPELINE_EXECUTION.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO LOGS.ADF_PIPELINE_EXECUTION_LOG 2 | ( 3 | ADF_PIPELINE_RUN_ID 4 | ,ADF_PIPELINE_NAME 5 | ,ADF_PIPELINE_TRIGGER_ID 6 | ,ADF_PIPELINE_TRIGGER_NAME 7 | ,ADF_PIPELINE_TRIGGER_TYPE 8 | ,ADF_PIPELINE_TRIGGER_TIME 9 | ,ADF_DATA_FACTORY_NAME 10 | ,EXECUTION_START_AT 11 | ,EXECUTION_STATUS 12 | ,META_CREATED_AT 13 | ,META_UPDATED_AT 14 | ) 15 | VALUES 16 | ( 17 | $ADF_PIPELINE_RUN_ID 18 | ,$ADF_PIPELINE_NAME 19 | ,$ADF_PIPELINE_TRIGGER_ID 20 | ,$ADF_PIPELINE_TRIGGER_NAME 21 | ,$ADF_PIPELINE_TRIGGER_TYPE 22 | ,TO_TIMESTAMP_NTZ($ADF_PIPELINE_TRIGGER_TIME) 23 | ,$ADF_DATA_FACTORY_NAME 24 | ,TO_TIMESTAMP_NTZ(CURRENT_TIMESTAMP()) 25 | ,'Running' 26 | ,TO_TIMESTAMP_NTZ(CURRENT_TIMESTAMP()) 27 | ,TO_TIMESTAMP_NTZ(CURRENT_TIMESTAMP()) 28 | ); 29 | -------------------------------------------------------------------------------- /Docs/SampleStoredProcedures/ADF_DEMO/TRIPPIN/SP_LOAD_PEOPLE_STAGE.sql: -------------------------------------------------------------------------------- 1 | TRUNCATE TABLE TRIPPIN.PEOPLE_STAGE; 2 | 3 | CALL TRIPPIN.SP_COPY_INTO_PEOPLE_STAGE($FILE_NAME); 4 | 5 | SELECT $1:rows_loaded AS ROWS_LOADED FROM TABLE(RESULT_SCAN(LAST_QUERY_ID())); 6 | -------------------------------------------------------------------------------- /Docs/SampleStoredProcedures/ADF_DEMO/TRIPPIN/SP_MERGE_PEOPLE.sql: -------------------------------------------------------------------------------- 1 | MERGE INTO TRIPPIN.PEOPLE T 2 | USING TRIPPIN.PEOPLE_STAGE S 3 | ON 4 | ( 5 | T.USERNAME = S.USERNAME 6 | ) 7 | WHEN NOT MATCHED THEN INSERT 8 | ( 9 | USERNAME 10 | ,FIRSTNAME 11 | ,LASTNAME 12 | ,MIDDLENAME 13 | ,AGE 14 | ,META_CREATED_AT 15 | ,META_UPDATED_AT 16 | ) 17 | VALUES 18 | ( 19 | S.USERNAME 20 | ,S.FIRSTNAME 21 | ,S.LASTNAME 22 | ,S.MIDDLENAME 23 | ,S.AGE 24 | ,TO_TIMESTAMP_NTZ(CURRENT_TIMESTAMP()) 25 | ,TO_TIMESTAMP_NTZ(CURRENT_TIMESTAMP()) 26 | ) 27 | WHEN MATCHED THEN UPDATE 28 | SET 29 | T.FIRSTNAME = S.FIRSTNAME 30 | ,T.LASTNAME = S.LASTNAME 31 | ,T.MIDDLENAME = S.MIDDLENAME 32 | ,T.AGE = S.AGE 33 | ,T.META_UPDATED_AT = TO_TIMESTAMP_NTZ(CURRENT_TIMESTAMP()) 34 | ; 35 | 36 | SELECT "number of rows inserted" AS ROWS_INSERTED, "number of rows updated" AS ROWS_UPDATED FROM TABLE(RESULT_SCAN(LAST_QUERY_ID())); 37 | -------------------------------------------------------------------------------- /Docs/SampleStoredProcedures/MyDatabase/MySchema/MyStoredProcedure.sql: -------------------------------------------------------------------------------- 1 | SELECT $FIRST_NAME AS FIRST_NAME, 'Doe' AS LAST_NAME, 1 AS AGE; 2 | 3 | SELECT CONCAT($FIRST_NAME, 'Bar') AS OUTPUT_1, $AGE + 100 AS OUTPUT_2; 4 | -------------------------------------------------------------------------------- /Docs/Screenshots/adf-pipeline-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremiahhansen/snowflake-connector-adf/f072ef1fadf81bd24c9bfe944e195278c4ba518e/Docs/Screenshots/adf-pipeline-overview.png -------------------------------------------------------------------------------- /Docs/Screenshots/connector-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremiahhansen/snowflake-connector-adf/f072ef1fadf81bd24c9bfe944e195278c4ba518e/Docs/Screenshots/connector-overview.png -------------------------------------------------------------------------------- /Docs/Screenshots/connector-sequence-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremiahhansen/snowflake-connector-adf/f072ef1fadf81bd24c9bfe944e195278c4ba518e/Docs/Screenshots/connector-sequence-diagram.png -------------------------------------------------------------------------------- /Docs/SnowflakeConnectorAdfArmTemplate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "resourceNamePrefix": { 6 | "type": "string" 7 | }, 8 | "keyVaultOwnerObjectId": { 9 | "type": "string" 10 | }, 11 | "snowflakeConnectionString": { 12 | "type": "securestring" 13 | } 14 | }, 15 | "variables": { 16 | "keyVaultName": "[concat(parameters('resourceNamePrefix'), 'kv')]", 17 | "storageAccountName": "[concat(parameters('resourceNamePrefix'), 'sa')]", 18 | "appServicePlanName": "[concat(parameters('resourceNamePrefix'), 'asp')]", 19 | "functionAppName": "[concat(parameters('resourceNamePrefix'), 'fa')]", 20 | "dataFactoryName": "[concat(parameters('resourceNamePrefix'), 'adf')]", 21 | "dataFactoryId": "[concat('Microsoft.DataFactory/factories/', variables('dataFactoryName'))]", 22 | "storageAccountContainerNameSprocs": "storedprocedures", 23 | "storageAccountContainerNameStage": "blobstage" 24 | }, 25 | "resources": [ 26 | { 27 | "type": "Microsoft.KeyVault/vaults", 28 | "apiVersion": "2016-10-01", 29 | "name": "[variables('keyVaultName')]", 30 | "location": "[resourceGroup().location]", 31 | "properties": { 32 | "sku": { 33 | "family": "A", 34 | "name": "Standard" 35 | }, 36 | "tenantId": "[subscription().tenantId]", 37 | "accessPolicies": [ 38 | { 39 | "tenantId": "[subscription().tenantId]", 40 | "objectId": "[parameters('keyVaultOwnerObjectId')]", 41 | "permissions": { 42 | "keys": [], 43 | "secrets": [ 44 | "Get", 45 | "List", 46 | "Set" 47 | ], 48 | "certificates": [] 49 | } 50 | } 51 | ], 52 | "enabledForDeployment": false, 53 | "enabledForDiskEncryption": false, 54 | "enabledForTemplateDeployment": false 55 | } 56 | }, 57 | { 58 | "type": "Microsoft.KeyVault/vaults/secrets", 59 | "apiVersion": "2018-02-14", 60 | "name": "[concat(variables('keyVaultName'), '/snowflakeConnectionString')]", 61 | "location": "[resourceGroup().location]", 62 | "properties": { 63 | "value": "[parameters('snowflakeConnectionString')]" 64 | }, 65 | "dependsOn": [ 66 | "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]" 67 | ] 68 | }, 69 | { 70 | "type": "Microsoft.Storage/storageAccounts", 71 | "apiVersion": "2019-04-01", 72 | "name": "[variables('storageAccountName')]", 73 | "location": "[resourceGroup().location]", 74 | "sku": { 75 | "name": "Standard_LRS", 76 | "tier": "Standard" 77 | }, 78 | "kind": "StorageV2", 79 | "properties": { 80 | "networkAcls": { 81 | "bypass": "AzureServices", 82 | "virtualNetworkRules": [], 83 | "ipRules": [], 84 | "defaultAction": "Allow" 85 | }, 86 | "supportsHttpsTrafficOnly": true, 87 | "encryption": { 88 | "services": { 89 | "file": { 90 | "enabled": true 91 | }, 92 | "blob": { 93 | "enabled": true 94 | } 95 | }, 96 | "keySource": "Microsoft.Storage" 97 | }, 98 | "accessTier": "Hot" 99 | } 100 | }, 101 | { 102 | "type": "Microsoft.Storage/storageAccounts/blobServices/containers", 103 | "apiVersion": "2019-04-01", 104 | "name": "[concat(variables('storageAccountName'), '/default/', variables('storageAccountContainerNameSprocs'))]", 105 | "properties": { 106 | "publicAccess": "None" 107 | }, 108 | "dependsOn": [ 109 | "[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]" 110 | ] 111 | }, 112 | { 113 | "type": "Microsoft.Storage/storageAccounts/blobServices/containers", 114 | "apiVersion": "2019-04-01", 115 | "name": "[concat(variables('storageAccountName'), '/default/', variables('storageAccountContainerNameStage'))]", 116 | "properties": { 117 | "publicAccess": "None" 118 | }, 119 | "dependsOn": [ 120 | "[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]" 121 | ] 122 | }, 123 | { 124 | "type": "Microsoft.Web/serverfarms", 125 | "apiVersion": "2016-09-01", 126 | "name": "[variables('appServicePlanName')]", 127 | "location": "[resourceGroup().location]", 128 | "sku": { 129 | "name": "Y1", 130 | "tier": "Dynamic", 131 | "size": "Y1", 132 | "family": "Y", 133 | "capacity": 0 134 | }, 135 | "kind": "functionapp", 136 | "properties": { 137 | "name": "[variables('appServicePlanName')]", 138 | "perSiteScaling": false, 139 | "reserved": false, 140 | "targetWorkerCount": 0, 141 | "targetWorkerSizeId": 0 142 | } 143 | }, 144 | { 145 | "type": "Microsoft.Web/sites", 146 | "apiVersion": "2016-08-01", 147 | "name": "[variables('functionAppName')]", 148 | "location": "[resourceGroup().location]", 149 | "kind": "functionapp", 150 | "identity": { 151 | "type": "SystemAssigned" 152 | }, 153 | "properties": { 154 | "name": "[variables('functionAppName')]", 155 | "clientAffinityEnabled": false, 156 | "reserved": false, 157 | "siteConfig": { 158 | "appSettings": [ 159 | { 160 | "name": "AzureWebJobsStorage", 161 | "value": "[concat('DefaultEndpointsProtocol=https;AccountName=',variables('storageAccountName'),';AccountKey=',listKeys(resourceId('Microsoft.Storage/storageAccounts', variables('StorageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value)]" 162 | }, 163 | { 164 | "name": "FUNCTIONS_EXTENSION_VERSION", 165 | "value": "~3" 166 | }, 167 | { 168 | "name": "FUNCTIONS_WORKER_RUNTIME", 169 | "value": "dotnet" 170 | }, 171 | { 172 | "name": "WEBSITE_CONTENTAZUREFILECONNECTIONSTRING", 173 | "value": "[concat('DefaultEndpointsProtocol=https;AccountName=',variables('storageAccountName'),';AccountKey=',listKeys(resourceId('Microsoft.Storage/storageAccounts', variables('StorageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value)]" 174 | }, 175 | { 176 | "name": "WEBSITE_CONTENTSHARE", 177 | "value": "[toLower(variables('functionAppName'))]" 178 | }, 179 | { 180 | "name": "WEBSITE_NODE_DEFAULT_VERSION", 181 | "value": "10.14.1" 182 | }, 183 | { 184 | "name": "storageAccountConnectionString", 185 | "value": "[concat('@Microsoft.KeyVault(SecretUri=https://',variables('keyVaultName'),'.vault.azure.net/secrets/storageAccountConnectionString/VERSION)')]" 186 | }, 187 | { 188 | "name": "storageAccountContainerName", 189 | "value": "[variables('storageAccountContainerNameSprocs')]" 190 | }, 191 | { 192 | "name": "snowflakeConnectionString", 193 | "value": "[concat('@Microsoft.KeyVault(SecretUri=https://',variables('keyVaultName'),'.vault.azure.net/secrets/snowflakeConnectionString/VERSION)')]" 194 | } 195 | ] 196 | } 197 | }, 198 | "dependsOn": [ 199 | "[resourceId('Microsoft.Web/serverfarms', variables('appServicePlanName'))]" 200 | ] 201 | }, 202 | { 203 | "type": "Microsoft.KeyVault/vaults/secrets", 204 | "apiVersion": "2018-02-14", 205 | "name": "[concat(variables('keyVaultName'), '/storageAccountConnectionString')]", 206 | "location": "[resourceGroup().location]", 207 | "properties": { 208 | "value": "[concat('DefaultEndpointsProtocol=https;AccountName=',variables('storageAccountName'),';AccountKey=',listKeys(resourceId('Microsoft.Storage/storageAccounts', variables('StorageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value)]" 209 | }, 210 | "dependsOn": [ 211 | "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]", 212 | "[variables('storageAccountName')]" 213 | ] 214 | }, 215 | { 216 | "type": "Microsoft.KeyVault/vaults/secrets", 217 | "apiVersion": "2018-02-14", 218 | "name": "[concat(variables('keyVaultName'), '/functionAppHostKey')]", 219 | "location": "[resourceGroup().location]", 220 | "properties": { 221 | "value": "[listkeys(concat(resourceGroup().id,'/providers/Microsoft.Web/sites/', variables('functionAppName'), '/host/default/'),'2016-08-01').functionKeys.default]" 222 | }, 223 | "dependsOn": [ 224 | "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]", 225 | "[variables('functionAppName')]" 226 | ] 227 | }, 228 | { 229 | "type": "Microsoft.DataFactory/factories", 230 | "apiVersion": "2018-06-01", 231 | "name": "[variables('dataFactoryName')]", 232 | "location": "[resourceGroup().location]", 233 | "identity": { 234 | "type": "SystemAssigned" 235 | }, 236 | "properties": {} 237 | }, 238 | { 239 | "type": "Microsoft.DataFactory/factories/linkedServices", 240 | "apiVersion": "2018-06-01", 241 | "name": "[concat(variables('dataFactoryName'), '/KeyVault_LS')]", 242 | "properties": { 243 | "annotations": [], 244 | "type": "AzureKeyVault", 245 | "typeProperties": { 246 | "baseUrl": "[concat('https://', variables('keyVaultName'), '.vault.azure.net/')]" 247 | } 248 | }, 249 | "dependsOn": [ 250 | "[variables('dataFactoryName')]" 251 | ] 252 | }, 253 | { 254 | "type": "Microsoft.DataFactory/factories/linkedServices", 255 | "apiVersion": "2018-06-01", 256 | "name": "[concat(variables('dataFactoryName'), '/FunctionApp_LS')]", 257 | "properties": { 258 | "annotations": [], 259 | "type": "AzureFunction", 260 | "typeProperties": { 261 | "functionAppUrl": "[concat('https://', variables('functionAppName'), '.azurewebsites.net')]", 262 | "functionKey": { 263 | "type": "AzureKeyVaultSecret", 264 | "store": { 265 | "referenceName": "KeyVault_LS", 266 | "type": "LinkedServiceReference" 267 | }, 268 | "secretName": "functionAppHostKey" 269 | } 270 | } 271 | }, 272 | "dependsOn": [ 273 | "[concat(variables('dataFactoryId'), '/linkedServices/KeyVault_LS')]" 274 | ] 275 | }, 276 | { 277 | "type": "Microsoft.DataFactory/factories/linkedServices", 278 | "apiVersion": "2018-06-01", 279 | "name": "[concat(variables('dataFactoryName'), '/BlobStage_LS')]", 280 | "properties": { 281 | "annotations": [], 282 | "type": "AzureBlobStorage", 283 | "typeProperties": { 284 | "connectionString": { 285 | "type": "AzureKeyVaultSecret", 286 | "store": { 287 | "referenceName": "KeyVault_LS", 288 | "type": "LinkedServiceReference" 289 | }, 290 | "secretName": "storageAccountConnectionString" 291 | } 292 | } 293 | }, 294 | "dependsOn": [ 295 | "[concat(variables('dataFactoryId'), '/linkedServices/KeyVault_LS')]" 296 | ] 297 | }, 298 | { 299 | "type": "Microsoft.DataFactory/factories/linkedServices", 300 | "apiVersion": "2018-06-01", 301 | "name": "[concat(variables('dataFactoryName'), '/TripPin_LS')]", 302 | "properties": { 303 | "annotations": [], 304 | "type": "OData", 305 | "typeProperties": { 306 | "url": "https://services.odata.org/TripPinRESTierService", 307 | "authenticationType": "Anonymous" 308 | } 309 | }, 310 | "dependsOn": [ 311 | "[variables('dataFactoryName')]" 312 | ] 313 | }, 314 | { 315 | "type": "Microsoft.DataFactory/factories/datasets", 316 | "apiVersion": "2018-06-01", 317 | "name": "[concat(variables('dataFactoryName'), '/DelimitedText_DS')]", 318 | "properties": { 319 | "linkedServiceName": { 320 | "referenceName": "BlobStage_LS", 321 | "type": "LinkedServiceReference" 322 | }, 323 | "parameters": { 324 | "fileName": { 325 | "type": "string" 326 | } 327 | }, 328 | "annotations": [], 329 | "type": "DelimitedText", 330 | "typeProperties": { 331 | "location": { 332 | "type": "AzureBlobStorageLocation", 333 | "fileName": { 334 | "value": "@dataset().fileName", 335 | "type": "Expression" 336 | }, 337 | "container": "[variables('storageAccountContainerNameStage')]" 338 | }, 339 | "columnDelimiter": ",", 340 | "rowDelimiter": "\n", 341 | "escapeChar": "\\", 342 | "firstRowAsHeader": true, 343 | "quoteChar": "\"" 344 | }, 345 | "schema": [] 346 | }, 347 | "dependsOn": [ 348 | "[concat(variables('dataFactoryId'), '/linkedServices/BlobStage_LS')]" 349 | ] 350 | }, 351 | { 352 | "type": "Microsoft.DataFactory/factories/datasets", 353 | "apiVersion": "2018-06-01", 354 | "name": "[concat(variables('dataFactoryName'), '/TripPin_DS')]", 355 | "properties": { 356 | "linkedServiceName": { 357 | "referenceName": "TripPin_LS", 358 | "type": "LinkedServiceReference" 359 | }, 360 | "annotations": [], 361 | "type": "ODataResource", 362 | "schema": [], 363 | "typeProperties": { 364 | "path": "People" 365 | } 366 | }, 367 | "dependsOn": [ 368 | "[concat(variables('dataFactoryId'), '/linkedServices/TripPin_LS')]" 369 | ] 370 | }, 371 | { 372 | "type": "Microsoft.DataFactory/factories/pipelines", 373 | "apiVersion": "2018-06-01", 374 | "name": "[concat(variables('dataFactoryName'), '/SampleSnowflakePipeline_P')]", 375 | "properties": { 376 | "activities": [ 377 | { 378 | "name": "StoredProcedure1", 379 | "type": "AzureFunctionActivity", 380 | "dependsOn": [], 381 | "policy": { 382 | "timeout": "7.00:00:00", 383 | "retry": 0, 384 | "retryIntervalInSeconds": 30, 385 | "secureOutput": false, 386 | "secureInput": false 387 | }, 388 | "userProperties": [], 389 | "typeProperties": { 390 | "functionName": "SnowflakeConnectorAdf", 391 | "method": "POST", 392 | "headers": {}, 393 | "body": { 394 | "value": "{\n \"databaseName\": \"MyDatabase\",\n \"schemaName\": \"MySchema\",\n \"storedProcedureName\": \"MyStoredProcedure\",\n \"parameters\": [\n {\n \"name\": \"FIRST_NAME\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{pipeline().parameters.FIRST_NAME}\"\n },\n {\n \"name\": \"AGE\",\n \"type\": \"NUMBER\",\n \"value\": @{pipeline().parameters.AGE}\n }\n ]\n}", 395 | "type": "Expression" 396 | } 397 | }, 398 | "linkedServiceName": { 399 | "referenceName": "FunctionApp_LS", 400 | "type": "LinkedServiceReference" 401 | } 402 | }, 403 | { 404 | "name": "StoredProcedure2", 405 | "type": "AzureFunctionActivity", 406 | "dependsOn": [ 407 | { 408 | "activity": "StoredProcedure1", 409 | "dependencyConditions": [ 410 | "Succeeded" 411 | ] 412 | } 413 | ], 414 | "policy": { 415 | "timeout": "7.00:00:00", 416 | "retry": 0, 417 | "retryIntervalInSeconds": 30, 418 | "secureOutput": false, 419 | "secureInput": false 420 | }, 421 | "userProperties": [], 422 | "typeProperties": { 423 | "functionName": "SnowflakeConnectorAdf", 424 | "method": "POST", 425 | "headers": {}, 426 | "body": { 427 | "value": "{\n \"databaseName\": \"MyDatabase\",\n \"schemaName\": \"MySchema\",\n \"storedProcedureName\": \"MyStoredProcedure\",\n \"parameters\": [\n {\n \"name\": \"FIRST_NAME\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{activity('StoredProcedure1').output.OUTPUT_1}\"\n },\n {\n \"name\": \"AGE\",\n \"type\": \"NUMBER\",\n \"value\": @{activity('StoredProcedure1').output.OUTPUT_2}\n }\n ]\n}", 428 | "type": "Expression" 429 | } 430 | }, 431 | "linkedServiceName": { 432 | "referenceName": "FunctionApp_LS", 433 | "type": "LinkedServiceReference" 434 | } 435 | } 436 | ], 437 | "parameters": { 438 | "FIRST_NAME": { 439 | "type": "string", 440 | "defaultValue": "Foo" 441 | }, 442 | "AGE": { 443 | "type": "int", 444 | "defaultValue": 10 445 | } 446 | }, 447 | "annotations": [] 448 | }, 449 | "dependsOn": [ 450 | "[concat(variables('dataFactoryId'), '/linkedServices/FunctionApp_LS')]" 451 | ] 452 | }, 453 | { 454 | "type": "Microsoft.DataFactory/factories/pipelines", 455 | "apiVersion": "2018-06-01", 456 | "name": "[concat(variables('dataFactoryName'), '/DataIngestion_P')]", 457 | "properties": { 458 | "activities": [ 459 | { 460 | "name": "StartPipelineExecutionLog_AF", 461 | "type": "AzureFunctionActivity", 462 | "dependsOn": [], 463 | "policy": { 464 | "timeout": "7.00:00:00", 465 | "retry": 0, 466 | "retryIntervalInSeconds": 30, 467 | "secureOutput": false, 468 | "secureInput": false 469 | }, 470 | "userProperties": [], 471 | "typeProperties": { 472 | "functionName": "SnowflakeConnectorAdf", 473 | "method": "POST", 474 | "headers": {}, 475 | "body": { 476 | "value": "{\n \"databaseName\": \"@{pipeline().parameters.databaseName}\",\n \"schemaName\": \"LOGS\",\n \"storedProcedureName\": \"SP_START_PIPELINE_EXECUTION\",\n \"parameters\": [\n {\n \"name\": \"ADF_PIPELINE_RUN_ID\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{pipeline().RunId}\"\n },\n {\n \"name\": \"ADF_PIPELINE_NAME\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{pipeline().Pipeline}\"\n },\n {\n \"name\": \"ADF_PIPELINE_TRIGGER_ID\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{pipeline().TriggerId}\"\n },\n {\n \"name\": \"ADF_PIPELINE_TRIGGER_NAME\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{pipeline().TriggerName}\"\n },\n {\n \"name\": \"ADF_PIPELINE_TRIGGER_TYPE\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{pipeline().TriggerType}\"\n },\n {\n \"name\": \"ADF_PIPELINE_TRIGGER_TIME\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{pipeline().TriggerTime}\"\n },\n {\n \"name\": \"ADF_DATA_FACTORY_NAME\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{pipeline().DataFactory}\"\n }\n ]\n}", 477 | "type": "Expression" 478 | } 479 | }, 480 | "linkedServiceName": { 481 | "referenceName": "FunctionApp_LS", 482 | "type": "LinkedServiceReference" 483 | } 484 | }, 485 | { 486 | "name": "SetFileName_SV", 487 | "type": "SetVariable", 488 | "dependsOn": [ 489 | { 490 | "activity": "StartPipelineExecutionLog_AF", 491 | "dependencyConditions": [ 492 | "Succeeded" 493 | ] 494 | } 495 | ], 496 | "userProperties": [], 497 | "typeProperties": { 498 | "variableName": "fileName", 499 | "value": { 500 | "value": "@concat(pipeline().parameters.tableName, '_', formatDateTime(pipeline().TriggerTime,'yyyyMMddHHmmss'), '.txt')", 501 | "type": "Expression" 502 | } 503 | } 504 | }, 505 | { 506 | "name": "CopySourceToBlob_CD", 507 | "type": "Copy", 508 | "dependsOn": [ 509 | { 510 | "activity": "SetFileName_SV", 511 | "dependencyConditions": [ 512 | "Succeeded" 513 | ] 514 | } 515 | ], 516 | "policy": { 517 | "timeout": "7.00:00:00", 518 | "retry": 0, 519 | "retryIntervalInSeconds": 30, 520 | "secureOutput": false, 521 | "secureInput": false 522 | }, 523 | "userProperties": [], 524 | "typeProperties": { 525 | "source": { 526 | "type": "ODataSource" 527 | }, 528 | "sink": { 529 | "type": "DelimitedTextSink", 530 | "storeSettings": { 531 | "type": "AzureBlobStorageWriteSettings" 532 | }, 533 | "formatSettings": { 534 | "type": "DelimitedTextWriteSettings", 535 | "quoteAllText": true, 536 | "fileExtension": ".txt" 537 | } 538 | }, 539 | "enableStaging": false 540 | }, 541 | "inputs": [ 542 | { 543 | "referenceName": "TripPin_DS", 544 | "type": "DatasetReference", 545 | "parameters": {} 546 | } 547 | ], 548 | "outputs": [ 549 | { 550 | "referenceName": "DelimitedText_DS", 551 | "type": "DatasetReference", 552 | "parameters": { 553 | "fileName": { 554 | "value": "@variables('fileName')", 555 | "type": "Expression" 556 | } 557 | } 558 | } 559 | ] 560 | }, 561 | { 562 | "name": "CopyBlobToStage_AF", 563 | "type": "AzureFunctionActivity", 564 | "dependsOn": [ 565 | { 566 | "activity": "CopySourceToBlob_CD", 567 | "dependencyConditions": [ 568 | "Succeeded" 569 | ] 570 | } 571 | ], 572 | "policy": { 573 | "timeout": "7.00:00:00", 574 | "retry": 0, 575 | "retryIntervalInSeconds": 30, 576 | "secureOutput": false, 577 | "secureInput": false 578 | }, 579 | "userProperties": [], 580 | "typeProperties": { 581 | "functionName": "SnowflakeConnectorAdf", 582 | "method": "POST", 583 | "headers": {}, 584 | "body": { 585 | "value": "{\n \"databaseName\": \"@{pipeline().parameters.databaseName}\",\n \"schemaName\": \"@{pipeline().parameters.schemaName}\",\n \"storedProcedureName\": \"SP_LOAD_@{pipeline().parameters.tableName}_STAGE\",\n \"parameters\": [\n {\n \"name\": \"FILE_NAME\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{variables('fileName')}\"\n }\n ]\n}", 586 | "type": "Expression" 587 | } 588 | }, 589 | "linkedServiceName": { 590 | "referenceName": "FunctionApp_LS", 591 | "type": "LinkedServiceReference" 592 | } 593 | }, 594 | { 595 | "name": "MergeStageToTarget_AF", 596 | "type": "AzureFunctionActivity", 597 | "dependsOn": [ 598 | { 599 | "activity": "CopyBlobToStage_AF", 600 | "dependencyConditions": [ 601 | "Succeeded" 602 | ] 603 | } 604 | ], 605 | "policy": { 606 | "timeout": "7.00:00:00", 607 | "retry": 0, 608 | "retryIntervalInSeconds": 30, 609 | "secureOutput": false, 610 | "secureInput": false 611 | }, 612 | "userProperties": [], 613 | "typeProperties": { 614 | "functionName": "SnowflakeConnectorAdf", 615 | "method": "POST", 616 | "headers": {}, 617 | "body": { 618 | "value": "{\n \"databaseName\": \"@{pipeline().parameters.databaseName}\",\n \"schemaName\": \"@{pipeline().parameters.schemaName}\",\n \"storedProcedureName\": \"SP_MERGE_@{pipeline().parameters.tableName}\",\n \"parameters\": [\n {\n \"name\": \"PIPELINE_RUN_ID\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{pipeline().RunId}\"\n }\n ]\n}", 619 | "type": "Expression" 620 | } 621 | }, 622 | "linkedServiceName": { 623 | "referenceName": "FunctionApp_LS", 624 | "type": "LinkedServiceReference" 625 | } 626 | }, 627 | { 628 | "name": "EndPipelineExecutionLog_AF", 629 | "type": "AzureFunctionActivity", 630 | "dependsOn": [ 631 | { 632 | "activity": "MergeStageToTarget_AF", 633 | "dependencyConditions": [ 634 | "Succeeded" 635 | ] 636 | } 637 | ], 638 | "policy": { 639 | "timeout": "7.00:00:00", 640 | "retry": 0, 641 | "retryIntervalInSeconds": 30, 642 | "secureOutput": false, 643 | "secureInput": false 644 | }, 645 | "userProperties": [], 646 | "typeProperties": { 647 | "functionName": "SnowflakeConnectorAdf", 648 | "method": "POST", 649 | "headers": {}, 650 | "body": { 651 | "value": "{\n \"databaseName\": \"@{pipeline().parameters.databaseName}\",\n \"schemaName\": \"LOGS\",\n \"storedProcedureName\": \"SP_END_PIPELINE_EXECUTION\",\n \"parameters\": [\n {\n \"name\": \"ADF_PIPELINE_RUN_ID\",\n \"type\": \"VARCHAR\",\n \"value\": \"@{pipeline().RunId}\"\n },\n {\n \"name\": \"ROWS_LOADED\",\n \"type\": \"NUMBER\",\n \"value\": \"@{activity('CopyBlobToStage_AF').output.ROWS_LOADED}\"\n },\n {\n \"name\": \"ROWS_INSERTED\",\n \"type\": \"NUMBER\",\n \"value\": \"@{activity('MergeStageToTarget_AF').output.ROWS_INSERTED}\"\n },\n {\n \"name\": \"ROWS_UPDATED\",\n \"type\": \"NUMBER\",\n \"value\": \"@{activity('MergeStageToTarget_AF').output.ROWS_UPDATED}\"\n },\n {\n \"name\": \"ROWS_DELETED\",\n \"type\": \"NUMBER\",\n \"value\": 0\n }\n ]\n}", 652 | "type": "Expression" 653 | } 654 | }, 655 | "linkedServiceName": { 656 | "referenceName": "FunctionApp_LS", 657 | "type": "LinkedServiceReference" 658 | } 659 | } 660 | ], 661 | "parameters": { 662 | "databaseName": { 663 | "type": "string", 664 | "defaultValue": "ADF_DEMO" 665 | }, 666 | "schemaName": { 667 | "type": "string", 668 | "defaultValue": "TRIPPIN" 669 | }, 670 | "tableName": { 671 | "type": "string", 672 | "defaultValue": "PEOPLE" 673 | } 674 | }, 675 | "variables": { 676 | "fileName": { 677 | "type": "String" 678 | } 679 | }, 680 | "annotations": [] 681 | }, 682 | "dependsOn": [ 683 | "[concat(variables('dataFactoryId'), '/datasets/TripPin_DS')]", 684 | "[concat(variables('dataFactoryId'), '/datasets/DelimitedText_DS')]", 685 | "[concat(variables('dataFactoryId'), '/linkedServices/FunctionApp_LS')]" 686 | ] 687 | } 688 | ] 689 | } -------------------------------------------------------------------------------- /Docs/SnowflakeDbSetup.sql: -------------------------------------------------------------------------------- 1 | -- This script will setup the necessary Snowflake objects to run the sample ADF 2 | -- pipelines in this project. 3 | 4 | -- ---------------------------------------------------------------------------- 5 | -- Script setup 6 | -- ---------------------------------------------------------------------------- 7 | -- Step 1: Update the URL and AZURE_SAS_TOKEN properties for the ADF_BLOB_STAGE below 8 | 9 | -- Step 2: Update the PASSWORD property for the ADF_DEMO_USER user below 10 | 11 | 12 | -- ---------------------------------------------------------------------------- 13 | -- Create the database and related objects 14 | -- ---------------------------------------------------------------------------- 15 | USE ROLE SYSADMIN; 16 | 17 | CREATE OR REPLACE DATABASE ADF_DEMO 18 | COMMENT = 'Database to demo the Snowflake Azure Data Factory (ADF) Connector (see https://github.com/jeremiahhansen/snowflake-connector-adf)'; 19 | USE DATABASE ADF_DEMO; 20 | 21 | CREATE OR REPLACE SCHEMA TRIPPIN 22 | COMMENT = 'Schema to store data from the OData TripPin sample service (see https://www.odata.org/odata-services/)'; 23 | 24 | CREATE OR REPLACE SCHEMA LOGS 25 | COMMENT = 'Schema to store logs from the Snowflake Azure Data Factory (ADF) Connector'; 26 | 27 | CREATE OR REPLACE FILE FORMAT TRIPPIN.ADF_DATASET_FORMAT 28 | TYPE = 'CSV' 29 | COMPRESSION = 'AUTO' 30 | RECORD_DELIMITER = '\n' 31 | FIELD_DELIMITER = ',' 32 | SKIP_HEADER = 1 33 | FIELD_OPTIONALLY_ENCLOSED_BY = '\042' -- double quotes 34 | ; 35 | 36 | CREATE OR REPLACE STAGE TRIPPIN.ADF_BLOB_STAGE 37 | URL = 'azure://.blob.core.windows.net/blobstage' 38 | CREDENTIALS = (AZURE_SAS_TOKEN = '*****') 39 | ; 40 | --LIST @ADF_DEMO.TRIPPIN.ADF_BLOB_STAGE; 41 | 42 | CREATE OR REPLACE TABLE TRIPPIN.PEOPLE_STAGE 43 | ( 44 | UserName VARCHAR NOT NULL 45 | ,FirstName VARCHAR NOT NULL 46 | ,LastName VARCHAR NOT NULL 47 | ,MiddleName VARCHAR NULL 48 | ,Age NUMBER NULL 49 | ); 50 | 51 | CREATE OR REPLACE TABLE TRIPPIN.PEOPLE 52 | ( 53 | UserName VARCHAR NOT NULL 54 | ,FirstName VARCHAR NOT NULL 55 | ,LastName VARCHAR NOT NULL 56 | ,MiddleName VARCHAR NULL 57 | ,Age NUMBER NULL 58 | ,META_CREATED_AT TIMESTAMP_NTZ NOT NULL 59 | ,META_UPDATED_AT TIMESTAMP_NTZ NOT NULL 60 | ); 61 | 62 | CREATE OR REPLACE TABLE LOGS.ADF_PIPELINE_EXECUTION_LOG 63 | ( 64 | ADF_PIPELINE_RUN_ID VARCHAR NOT NULL 65 | ,ADF_PIPELINE_NAME VARCHAR NOT NULL 66 | ,ADF_PIPELINE_TRIGGER_ID VARCHAR NOT NULL 67 | ,ADF_PIPELINE_TRIGGER_NAME VARCHAR NOT NULL 68 | ,ADF_PIPELINE_TRIGGER_TYPE VARCHAR NOT NULL 69 | ,ADF_PIPELINE_TRIGGER_TIME TIMESTAMP_NTZ NOT NULL 70 | ,ADF_DATA_FACTORY_NAME VARCHAR NOT NULL 71 | ,EXECUTION_START_AT TIMESTAMP_NTZ NOT NULL 72 | ,EXECUTION_END_AT TIMESTAMP_NTZ NULL 73 | ,EXECUTION_STATUS VARCHAR NOT NULL 74 | ,ROWS_LOADED NUMBER NULL 75 | ,ROWS_INSERTED NUMBER NULL 76 | ,ROWS_UPDATED NUMBER NULL 77 | ,ROWS_DELETED NUMBER NULL 78 | ,META_CREATED_AT TIMESTAMP_NTZ NOT NULL 79 | ,META_UPDATED_AT TIMESTAMP_NTZ NOT NULL 80 | ); 81 | 82 | CREATE OR REPLACE PROCEDURE TRIPPIN.SP_COPY_INTO_PEOPLE_STAGE(FILE_NAME VARCHAR) 83 | RETURNS VARIANT 84 | LANGUAGE javascript 85 | AS 86 | $$ 87 | var validFileNameRegex = /^[\/\\A-Za-z0-9._-]+$/; 88 | if (!validFileNameRegex.test(FILE_NAME)) { 89 | throw "Possible SQL Injection attack with file '" + FILE_NAME + "'"; 90 | } 91 | 92 | var sqlCommand = "COPY INTO TRIPPIN.PEOPLE_STAGE \ 93 | FROM @TRIPPIN.ADF_BLOB_STAGE \ 94 | FILES = ('" + FILE_NAME + "') \ 95 | FILE_FORMAT = (FORMAT_NAME = TRIPPIN.ADF_DATASET_FORMAT) \ 96 | FORCE = TRUE"; 97 | var resultSet = snowflake.execute({sqlText: sqlCommand}); 98 | 99 | // Now get the previous DML result as a JSON object 100 | sqlCommand = "SELECT OBJECT_CONSTRUCT(*) FROM TABLE(RESULT_SCAN(LAST_QUERY_ID()))"; 101 | resultSet = snowflake.execute({sqlText: sqlCommand}); 102 | 103 | // There should only be one row and one column returned 104 | resultSet.next(); 105 | return resultSet.getColumnValue(1); 106 | $$; 107 | 108 | 109 | -- ---------------------------------------------------------------------------- 110 | -- Create the virtual warehouse 111 | -- ---------------------------------------------------------------------------- 112 | CREATE OR REPLACE WAREHOUSE ADF_DEMO_WH WITH 113 | WAREHOUSE_SIZE = 'XSMALL' 114 | WAREHOUSE_TYPE = 'STANDARD' 115 | AUTO_SUSPEND = 300 116 | AUTO_RESUME = TRUE 117 | MIN_CLUSTER_COUNT = 1 118 | MAX_CLUSTER_COUNT = 2 119 | SCALING_POLICY = 'STANDARD' 120 | ; 121 | 122 | 123 | -- ---------------------------------------------------------------------------- 124 | -- Create the role, user and grant privileges 125 | -- ---------------------------------------------------------------------------- 126 | USE ROLE SECURITYADMIN; 127 | 128 | CREATE OR REPLACE ROLE ADF_DEMO_ROLE; 129 | GRANT ROLE ADF_DEMO_ROLE TO ROLE SYSADMIN; 130 | GRANT OWNERSHIP ON DATABASE ADF_DEMO TO ROLE ADF_DEMO_ROLE; 131 | GRANT OWNERSHIP ON SCHEMA ADF_DEMO.TRIPPIN TO ROLE ADF_DEMO_ROLE; 132 | GRANT OWNERSHIP ON SCHEMA ADF_DEMO.LOGS TO ROLE ADF_DEMO_ROLE; 133 | GRANT OWNERSHIP ON FILE FORMAT ADF_DEMO.TRIPPIN.ADF_DATASET_FORMAT TO ROLE ADF_DEMO_ROLE; 134 | GRANT OWNERSHIP ON STAGE ADF_DEMO.TRIPPIN.ADF_BLOB_STAGE TO ROLE ADF_DEMO_ROLE; 135 | GRANT OWNERSHIP ON TABLE ADF_DEMO.TRIPPIN.PEOPLE_STAGE TO ROLE ADF_DEMO_ROLE; 136 | GRANT OWNERSHIP ON TABLE ADF_DEMO.TRIPPIN.PEOPLE TO ROLE ADF_DEMO_ROLE; 137 | GRANT OWNERSHIP ON TABLE ADF_DEMO.LOGS.ADF_PIPELINE_EXECUTION_LOG TO ROLE ADF_DEMO_ROLE; 138 | GRANT OWNERSHIP ON PROCEDURE ADF_DEMO.TRIPPIN.SP_COPY_INTO_PEOPLE_STAGE(VARCHAR) TO ROLE ADF_DEMO_ROLE; 139 | GRANT USAGE, MONITOR ON WAREHOUSE ADF_DEMO_WH TO ROLE ADF_DEMO_ROLE; 140 | --SHOW GRANTS TO ROLE ADF_DEMO_ROLE; 141 | --SHOW GRANTS TO ROLE SYSADMIN; 142 | 143 | CREATE USER IF NOT EXISTS ADF_DEMO_USER 144 | PASSWORD = '*****' 145 | DEFAULT_ROLE = ADF_DEMO_ROLE 146 | DEFAULT_WAREHOUSE = 'ADF_DEMO_WH' 147 | DEFAULT_NAMESPACE = 'ADF_DEMO' 148 | MUST_CHANGE_PASSWORD = FALSE 149 | ; 150 | GRANT ROLE ADF_DEMO_ROLE TO USER ADF_DEMO_USER; 151 | --SHOW GRANTS TO USER ADF_DEMO_USER; 152 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Snowflake Connector for Azure Data Factory (ADF) 2 | This connector is an Azure Function which allows Azure Data Factory (ADF) to connect to Snowflake in a flexible way. It provides SQL-based stored procedure functionality with dyamic parameters and return values. Used with ADF you can build complete end-to-end data warehouse solutions for Snowflake while following Microsoft and Azure best practices around portability and security. 3 | 4 | To get started please follow the steps outlined in the [Prerequisites](#Prerequisites) and [Deployment](#deployment) sections below. 5 | 6 | As of January 2021 ADF does now provide a native Snowflake connector. But it is still not possible to run an arbitrary SQL query (let alone call a stored procedure) against Snowflake. When that functionality is available much of this connector will be obsolete. 7 | 8 | _**CAUTION**_: This connector is limited to a maximum runtime of 230 seconds for each script executed. This limitation is not a result of this specific function's code, but rather of any HTTP triggered Azure Function (which is ultimately due to the default idle timeout of Azure Load Balancer). See [Function app timeout duration](https://docs.microsoft.com/en-us/azure/azure-functions/functions-scale#timeout) for more details. If you need to run a query against Snowflake that takes more than 230 seconds you need to consider a different option. Microsoft recommends following [the Durable Functions async pattern](https://docs.microsoft.com/en-us/azure/azure-functions/durable/durable-functions-overview#async-http) to get around this limitation. Fortunately, [dg-hub](https://github.com/dg-hub) has forked this project and updated it to make use of the durable functions async pattern. Please check out the [snowflake-durable-connector-adf project](https://github.com/dg-hub/snowflake-durable-connector-adf) for the details. 9 | 10 | ## Table of Contents 11 | 1. [Connector Overview](#connector-overview) 12 | 1. [High Level Overview](#high-level-overview) 13 | 1. [Parameters](#parameters) 14 | 1. [Multiple Queries](#multiple-queries) 15 | 1. [Return Values](#return-values) 16 | 1. [Script Storage](#script-storage) 17 | 1. [Integration with ADF](#integration-with-adf) 18 | 1. [ADF Overview](#adf-overview) 19 | 1. [ADF Expressions](#adf-expressions) 20 | 1. [Prerequisites](#prerequisites) 21 | 1. [Deployment](#deployment) 22 | 1. [Legal](#legal) 23 | 24 | ## Connector Overview 25 | ### High Level Overview 26 | At a really high level stored procedures provide the ability to pass parameters, run multiple SQL statments, and return values. I'll explain how it works below, but to begin with here is a high level overview of the connector: 27 | 28 | ![Connector Overview](/Docs/Screenshots/connector-overview.png?raw=true "Connector Overview") 29 | 30 | And a more detailed sequence diagram to help explain the overall process: 31 | 32 | ![Connector Sequence Diagram](/Docs/Screenshots/connector-sequence-diagram.png?raw=true "Connector Sequence Diagram") 33 | 34 | Now let's dig a bit deeper into how this works. The following is a sample SQL script which this connector treats like a stored procedure: 35 | 36 | ```sql 37 | SELECT $FIRST_NAME AS FIRST_NAME, 'Doe' AS LAST_NAME, 1 AS AGE; 38 | 39 | SELECT CONCAT($FIRST_NAME, 'Bar') AS OUTPUT_1, $AGE + 100 AS OUTPUT_2; 40 | ``` 41 | 42 | The connector expects all input to be supplied via the `POST` body. Here is a sample `POST` body to execute the script above: 43 | 44 | ```json 45 | { 46 | "databaseName": "MyDatabase", 47 | "schemaName": "MySchema", 48 | "storedProcedureName": "MyStoredProcedure", 49 | "parameters": [ 50 | { 51 | "name": "FIRST_NAME", 52 | "type": "VARCHAR", 53 | "value": "Foo" 54 | }, 55 | { 56 | "name": "AGE", 57 | "type": "NUMBER", 58 | "value": "10" 59 | } 60 | ] 61 | } 62 | ``` 63 | 64 | And in this case the connector would return the following JSON object: 65 | 66 | ```json 67 | { 68 | "OUTPUT_1": "FooBar", 69 | "OUTPUT_2": "110" 70 | } 71 | ``` 72 | 73 | ### Parameters 74 | The connector leverages [Snowflake SQL session variables](https://docs.snowflake.net/manuals/sql-reference/session-variables.html) to pass values to the stored procedure script code. 75 | 76 | The connector expects all parameters to be supplied via the `parameters` JSON array in the `POST` body. Here is a snippet from `POST` body in the [High Level Overview](#high-level-overview) section above which contains the parameter values: 77 | 78 | ```json 79 | { 80 | "parameters": [ 81 | { 82 | "name": "FIRST_NAME", 83 | "type": "VARCHAR", 84 | "value": "Foo" 85 | }, 86 | { 87 | "name": "AGE", 88 | "type": "NUMBER", 89 | "value": "10" 90 | } 91 | ] 92 | } 93 | ``` 94 | 95 | Basically it's an array of objects, with each object representing one parameter. Each parameter JSON object needs to contain the following three attributes: 96 | 97 | 1. name 98 | 1. type 99 | 1. value 100 | 101 | With the following values for `type` currently supported: 102 | 103 | * VARCHAR 104 | * NUMBER 105 | 106 | The connector generates a single `SET` query based of the parameters provided and executes it before running any of the queries in the SQL script. In this case the following `SET` query is generated: 107 | 108 | ```sql 109 | SET (FIRST_NAME,AGE) = ('Foo',10) 110 | ``` 111 | 112 | The SQL queries in the script then access the parameters as standard Snowflake session variables. Here is the second query from the [High Level Overview](#high-level-overview) above which uses each parameter/variable: 113 | 114 | ```sql 115 | SELECT CONCAT($FIRST_NAME, 'Bar') AS OUTPUT_1, $AGE + 100 AS OUTPUT_2; 116 | ``` 117 | 118 | For more advanced scenarios you can also leverage the Snowflake `IDENTIFIER()` function to dynamically reference Snowflake objects like databases, schemas, tables, columns, stage names, etc. See [String Literals / Session Variables / Bind Variables as Identifiers](https://docs.snowflake.net/manuals/sql-reference/identifier-literal.html) for more details. 119 | 120 | ### Multiple Queries 121 | Executing multiple SQL statements in a stored procedure is critical to support non-trival use cases. As of September 2019 the Snowflake API does not support running multiple SQL statements in a single API call. Because of that we need to manualy break up the script into each individual statement and run them sequentially. We also need to make sure that we run then in the same Snowflake session so that session scoped variables (the parameters) can be used across queries. 122 | 123 | The connector uses the standard SQL semicolon to identify query boundaries. As such the semicolon is required after each query. 124 | 125 | ### Return Values 126 | Returning values from a stored procedure is critical so that we can pass status and other elements (like row counts) to the caller. To enable return values the connector expects the final query in the script to return a result set with a single row with one or more columns. Here is the second query again from the [High Level Overview](#high-level-overview) above that does just that: 127 | 128 | ```sql 129 | SELECT CONCAT($FIRST_NAME, 'Bar') AS OUTPUT_1, $AGE + 100 AS OUTPUT_2; 130 | ``` 131 | 132 | The columns in the result are pivoted so that each column and value pair becomes one return value. The query above then returns two values `OUTPUT_1` and `OUTPUT_2`. Here is the resulting JSON object which the connector returns: 133 | 134 | ```json 135 | { 136 | "OUTPUT_1": "FooBar", 137 | "OUTPUT_2": "110" 138 | } 139 | ``` 140 | 141 | *Note:* If more than one row is returned in the final result the connector will fail with the following error: "Property with the same name already exists on object." 142 | 143 | 144 | ### Script Storage 145 | All "stored procedure" scripts executed by this connector are stored in an Azure Blob Storage account. The container name is configurable but defaults to `storedprocedures`. Within the container all files are organized according to the following convention (all case sensitive): 146 | 147 | ``` 148 | / 149 | / 150 | /.sql 151 | ``` 152 | 153 | The connector expects all three parameters to be supplied via the JSON `POST` body. Here is a snippet from `POST` body in the [High Level Overview](#high-level-overview) section above which contains the required parameter values: 154 | 155 | ```json 156 | { 157 | "databaseName": "MyDatabase", 158 | "schemaName": "MySchema", 159 | "storedProcedureName": "MyStoredProcedure" 160 | } 161 | ``` 162 | 163 | The connector builds a blob storage path based off of those values and reads in the corresponding script file. In this case the full path is `/MyDatabase/MySchema/MyStoredProcedure.sql`. You can find the sample [MyStoredProcedure.sql](/Docs/SampleStoredProcedures/MyDatabase/MySchema/MyStoredProcedure.sql) script in the `/Docs` folder of this repo. 164 | 165 | ## Integration with ADF 166 | ### ADF Overview 167 | This project comes with a few sample ADF pipelines which demonstrates how to use this connector within ADF. The sample ADF resources are deployed during the [Deployment](#deployment) section below and are contained in the [SnowflakeConnectorAdfArmTemplate.json](/Docs/SnowflakeConnectorAdfArmTemplate.json) script in the `/Docs` folder of this repo. Here is a screenshot showing one of the sample pipelines: 168 | 169 | ![ADF Pipeline Overview](/Docs/Screenshots/adf-pipeline-overview.png?raw=true "ADF Pipeline Overview") 170 | 171 | As shown in the screenshot above we use the native `Azure Function` Activity in ADF to interact with the Snowflake connector. The HTTP method is `POST` and the body contains the elements described above. 172 | 173 | ### ADF Expressions 174 | [Expressions in ADF](https://docs.microsoft.com/en-us/azure/data-factory/control-flow-expression-language-functions) are very powerful and allow us to make the parameters passed to the connector very flexible. The screenshot above shows how to make use of ADF pipeline parameters when calling the connector. Here is the body of the HTTP request from the first ADF activity: 175 | 176 | ```json 177 | { 178 | "databaseName": "MyDatabase", 179 | "schemaName": "MySchema", 180 | "storedProcedureName": "MyStoredProcedure", 181 | "parameters": [ 182 | { 183 | "name": "FIRST_NAME", 184 | "type": "VARCHAR", 185 | "value": "@{pipeline().parameters.FIRST_NAME}" 186 | }, 187 | { 188 | "name": "AGE", 189 | "type": "NUMBER", 190 | "value": "@{pipeline().parameters.AGE}" 191 | } 192 | ] 193 | } 194 | ``` 195 | 196 | And in order to access return values from an ADF activity we make use of the `activity()` expression function. Here is the body of the HTTP request from the second ADF activity: 197 | 198 | ```json 199 | { 200 | "databaseName": "MyDatabase", 201 | "schemaName": "MySchema", 202 | "storedProcedureName": "MyStoredProcedure", 203 | "parameters": [ 204 | { 205 | "name": "FIRST_NAME", 206 | "type": "VARCHAR", 207 | "value": "@{activity('StoredProcedure1').output.OUTPUT_1}" 208 | }, 209 | { 210 | "name": "AGE", 211 | "type": "NUMBER", 212 | "value": "@{activity('StoredProcedure1').output.OUTPUT_2}" 213 | } 214 | ] 215 | } 216 | ``` 217 | 218 | ## Prerequisites 219 | In order to deploy the connector and associate Azure resources you must have the following: 220 | 221 | 1. A Snowflake account and Snowflake user with ACCOUNTADMIN role access 222 | 1. An Azure Subscription with at least Contributor access to a resource group 223 | 1. [.NET Core 3.1](https://dotnet.microsoft.com/download/dotnet-core/3.1) installed on your computer (choose the newest version) 224 | 1. [Visual Studio Code](https://code.visualstudio.com) installed on your computer with the following Extensions installed 225 | 1. C# 226 | 1. Azure Functions 227 | 1. Azure Functions Core Tools installed on your computer (see the [Azure Functions Getting Started](https://code.visualstudio.com/tutorials/functions-extension/getting-started) guide for details) 228 | 1. [Azure Storage Explorer](https://azure.microsoft.com/en-us/features/storage-explorer/) installed on your computer 229 | 1. This entire repository either downloaded (or cloned) to your computer 230 | 231 | ## Deployment 232 | Please complete the steps outlined in the [Prerequisites](#Prerequisites) section first and then do the following: 233 | 234 | 1. Login to the [Azure Portal](https://portal.azure.com) 235 | 1. Create an Azure Resource Group to contain the required Azure resources (we'll be creating 5 resources total) 236 | 1. Lookup your Azure Active Directory Object Id 237 | 1. Open the Azure Active Directory (AAD) pane by clicking on the "Azure Active Directory" link in the left navbar or by searching for "Azure Active Directory" in the top search bar 238 | 1. Click on the "Users" link in the AAD left navbar 239 | 1. Search the list of users by using your name or email then open your user account 240 | 1. Copy the "Object ID" and save it for later 241 | 1. Deploy the Azure Resource Manager (ARM) template for the solution 242 | 1. Search for and open the "Deploy a custom template" service in the top search bar 243 | 1. Click on "Build your own template in the editor" and then copy and paste the entire contents of the `\Docs\SnowflakeConnectorAdfArmTemplate.json` file in this repo and click "Save" 244 | 1. On the "Custom deployment" update the following fields 245 | 1. *Resource Group*: Select the Resource Group you created earlier 246 | 1. *Resource Name Prefix*: Pick a unique prefix which will be appended to all Azure resources created 247 | 1. *Key Vault Owner Object Id*: Paste the Active Directory Object Id you looked up earlier 248 | 1. *Snowflake Connection String*: Enter the Snowflake connection string to your Snowflake account in the following format: `account=;host=;user=ADF_DEMO_USER;password=` (see the [Snowflake Connector for .NET](https://github.com/snowflakedb/snowflake-connector-net) page for more details, and remember the password you pick here because you'll use it again when creating the Snowflake objects) 249 | 1. Check the "I agree to the terms and conditions stated above" and click "Purchase" (*Note*: this just means that you agree to pay for the Azure resources being created by the ARM template) 250 | 1. Wait for the ARM deployment to complete 251 | 1. Create a Shared Access Signature (SAS) for the Snowflake STAGE object 252 | 1. Open the Storage Account resource that was created for the connector 253 | 1. Click on "Shared access signature" in the left nav bar 254 | 1. Update the SAS access policy details as appropriate. Here is a suggested setup for the sample pipeline: 255 | 1. For "Allowed services" make sure only "Blob" is selected 256 | 1. For "Allowed resource types" make sure only "Container" and "Object" are selected 257 | 1. For "Allowed permissions" you'll only need "Read" and "List" to load data 258 | 1. For the "End" time pick a date in the future, maybe a month out (depending) 259 | 1. For "Allowed protocols" make sure that "HTTPS only" is selected 260 | 1. Click on "Generate SAS and connection string" 261 | 1. Copy the "SAS token" and save for the later 262 | 1. Please note that once you leave this page you can't get this value again, so save it now. 263 | 1. Deploy the required Snowflake objects for the sample pipelines 264 | 1. Login to your Snowflake account with a user that has ACCOUNTADMIN role access 265 | 1. Open the `\Docs\SnowflakeDbSetup.sql` script in Snowflake, or copy and paste the contents to a blank Worksheet in Snowflake 266 | 1. Follow the steps in the "Script setup" section to update a few values in the script 267 | 1. Run all queries in the Worksheet 268 | 1. Create a Key Vault Access Policy for the Function App 269 | 1. Open the Key Vault resource that was created 270 | 1. Click on "Access policies" in the Key Vault left nav bar 271 | 1. Click on the "+Add Access Policy" link 272 | 1. Under *Secret permissions* click on "Get" and "list" 273 | 1. Click on *Select principal* then *Select* the Function App you created (the name will be the *Resource Name Prefix* you select earlier + "fa") 274 | 1. Click on "Select" and the "Add" 275 | 1. Click on "Save" to save the new Access Policy (**important**) 276 | 1. Create a Key Vault Access Policy for the Azure Data Factory 277 | 1. Follow the steps above except this time for the *Select principal* step enter the name of the Azure Data Factory you created (the name will be the *Resource Name Prefix* you select earlier + "adf") 278 | 1. Update the `snowflakeConnectionString` Function App setting with Key Vault secret version number (**note**: this is a temporary workaround until Azure Key Vault integration with Azure Functions is GA) 279 | 1. Open the Key Vault resource that was created 280 | 1. Click on the "Secrets" link in the Key Vault left nav bar and then click on the "snowflakeConnectionString" secret 281 | 1. Copy the "CURRENT VERSION" ID and save it for later 282 | 1. Open the Function App resource that was created 283 | 1. Click on the "Platform features" link and then on "Configuration" 284 | 1. Edit the "snowflakeConnectionString" setting and replace the `VERSION` string with the "CURRENT VERSION" ID from earlier 285 | 1. Click "Save" (**important**) 286 | 1. Update the `storageAccountConnectionString` Function App setting with Key Vault secret version number (**note**: this is a temporary workaround until Azure Key Vault integration with Azure Functions is GA) 287 | 1. Follow the steps above except this time use the "storageAccountConnectionString" secret 288 | 1. Upload the sample stored procedure scripts to the new connector storage account 289 | 1. Open Azure Storage Explorer and find your new connector storage account 290 | 1. Open the "storedprocedures" blob container 291 | 1. Click on "Upload" and then "Upload Folder..." 292 | 1. Select the "ADF_DEMO" folder from the `\Docs\SampleStoredProcedures` folder and click "Upload" 293 | 1. Select the "MyDatabase" folder from the `\Docs\SampleStoredProcedures` folder and click "Upload" 294 | 1. Note: You can also upload these files via the [Azure Portal UI](https://portal.azure.com) but you'll need to manually create the appropriate folder structure and upload each file individually 295 | 1. Deploy the Azure Function code to the new Function App (from **Visual Studio Code**) 296 | 1. Open the solution in Visual Studio Code (VS Code) 297 | 1. Click on the "Azure" icon in the left nav bar 298 | 1. Click on the "Deploy to Function App..." (up arrow) icon in the Azure Function pane 299 | 1. Select your new Function App from the list and click "Deploy" 300 | 1. Run the sample Azure Data Factory (ADF) pipeline 301 | 1. Open the Azure Data Factory resource that was created 302 | 1. Click on the "Author & Monitor" icon 303 | 1. Click on the "Author" (pencil) icon in the left navbar 304 | 1. Click on the `SampleSnowflakePipeline_P` pipeline in the Factory Resources section 305 | 1. Click on the "Debug" link and then "Finish" to execute the pipeline 306 | 1. Do the same to execute the `DataIngestion_P` pipeline 307 | 308 | In order to debug/run the Azure Function locally you need to create a `local.settings.json` file and add the three environment variables expected by the function. Here is a template for the contents of the file (you'll need to replace all <> placeholders with real values for your environment): 309 | 310 | ```json 311 | { 312 | "IsEncrypted": false, 313 | "Values": { 314 | "AzureWebJobsStorage": "", 315 | "FUNCTIONS_WORKER_RUNTIME": "dotnet", 316 | "storageAccountConnectionString": "DefaultEndpointsProtocol=https;AccountName=;AccountKey=;EndpointSuffix=core.windows.net", 317 | "storageAccountContainerName": "storedprocedures", 318 | "snowflakeConnectionString": "account=;host=;user=ADF_DEMO_USER;password=" 319 | } 320 | } 321 | ``` 322 | 323 | See the [Snowflake Connector for .NET](https://github.com/snowflakedb/snowflake-connector-net) page for important details around the Snowflake account name. 324 | 325 | ## Legal 326 | Copyright (c) 2019 Snowflake Inc. 327 | 328 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this connector except in compliance with the License. You may obtain a copy of the License at: [http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0) 329 | 330 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 331 | -------------------------------------------------------------------------------- /SnowflakeAdfConnector.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | netcoreapp3.1 4 | v3 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | PreserveNewest 14 | 15 | 16 | PreserveNewest 17 | Never 18 | 19 | 20 | -------------------------------------------------------------------------------- /SnowflakeConnectorAdf.cs: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2019 Snowflake Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at: 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on 10 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | See the License for the specific language governing permissions and limitations under the License. 12 | */ 13 | using System; 14 | using System.IO; 15 | using System.Data; 16 | using System.Threading.Tasks; 17 | using System.Text.RegularExpressions; 18 | using Microsoft.AspNetCore.Mvc; 19 | using Microsoft.Azure.WebJobs; 20 | using Microsoft.Azure.WebJobs.Extensions.Http; 21 | using Microsoft.AspNetCore.Http; 22 | using Microsoft.Extensions.Logging; 23 | using Newtonsoft.Json; 24 | using Newtonsoft.Json.Linq; 25 | using Snowflake.Data.Client; 26 | using Microsoft.WindowsAzure.Storage; 27 | 28 | namespace Snowflake.Connector 29 | { 30 | public static class SnowflakeConnectorAdf 31 | { 32 | private static string _validBlobFolderNameRegex = @"^[A-Za-z0-9_-]+$"; 33 | // The parameter name corresponds to a restricted Snowflake unquoted identifier 34 | // https://docs.snowflake.net/manuals/sql-reference/identifiers-syntax.html 35 | private static string _validParameterNameRegex = @"^[A-Za-z_]{1}[A-Za-z0-9_-]*$"; 36 | // This is pretty restrictive 37 | private static string _validParameterTypeRegex = @"^VARCHAR|NUMBER$"; 38 | private static string _validParameterValueRegex = @"^[A-Za-z0-9./\\ :_-]+$"; 39 | 40 | [FunctionName("SnowflakeConnectorAdf")] 41 | public static async Task Run( 42 | [HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = null)] HttpRequest req, 43 | ILogger log) 44 | { 45 | log.LogInformation($"Started at: {DateTime.Now.ToString()} (UTC)."); 46 | 47 | // Read the POST body and convert to a JSON object 48 | string requestBodyString = await new StreamReader(req.Body).ReadToEndAsync(); 49 | dynamic requestBody = JsonConvert.DeserializeObject(requestBodyString, 50 | new JsonSerializerSettings() { DateParseHandling = DateParseHandling.None }); 51 | 52 | try 53 | { 54 | // Get the required inputs and validate them 55 | #region Collect and validate inputs 56 | string snowflakeConnectionString = System.Environment.GetEnvironmentVariable("snowflakeConnectionString"); 57 | if (String.IsNullOrEmpty(snowflakeConnectionString)) 58 | { 59 | throw new Exception("snowflakeConnectionString must be provided"); 60 | } 61 | string storageAccountConnectionString = System.Environment.GetEnvironmentVariable("storageAccountConnectionString"); 62 | if (String.IsNullOrEmpty(storageAccountConnectionString)) 63 | { 64 | throw new Exception("storageAccountConnectionString must be provided"); 65 | } 66 | string storageAccountContainerName = System.Environment.GetEnvironmentVariable("storageAccountContainerName"); 67 | if (String.IsNullOrEmpty(storageAccountContainerName)) 68 | { 69 | throw new Exception("storageAccountContainerName must be provided"); 70 | } 71 | string databaseName = Convert.ToString(requestBody.databaseName); 72 | if (String.IsNullOrEmpty(databaseName)) 73 | { 74 | throw new Exception("databaseName must be provided"); 75 | } 76 | string schemaName = Convert.ToString(requestBody.schemaName); 77 | if (String.IsNullOrEmpty(schemaName)) 78 | { 79 | throw new Exception("schemaName must be provided"); 80 | } 81 | string storedProcedureName = Convert.ToString(requestBody.storedProcedureName); 82 | if (String.IsNullOrEmpty(storedProcedureName)) 83 | { 84 | throw new Exception("storedProcedureName must be provided"); 85 | } 86 | #endregion Collect and validate inputs 87 | 88 | // Generate the blob file path to the stored procedure 89 | string storageAccountBlobFilePath = generateStoredProcedureBlobFilePath(databaseName, schemaName, storedProcedureName); 90 | 91 | // Get the SQL text to execute 92 | Task blobReadTask = readContentFromBlobAsync(log, storageAccountConnectionString, storageAccountContainerName, storageAccountBlobFilePath); 93 | string sqlText = blobReadTask.GetAwaiter().GetResult(); 94 | sqlText = sqlText.Trim(); 95 | if (sqlText.Length == 0) 96 | { 97 | throw new Exception($"Blob script {storageAccountBlobFilePath} is empty"); 98 | } 99 | 100 | // Split the SQL text into individual queries since we can only run one query at a time against Snowflake 101 | string[] sqlCommands = splitSqlCommands(sqlText); 102 | 103 | // Convert any parameters to SQL variables 104 | string setVariableCommand = ""; 105 | if (requestBody.ContainsKey("parameters")) 106 | { 107 | setVariableCommand = generateSetVariableCommand(requestBody.parameters); 108 | } 109 | 110 | // Run the Snowflake SQL commands 111 | var output = runSnowflakeSqlCommands(log, snowflakeConnectionString, setVariableCommand, sqlCommands); 112 | 113 | // Return a 200 OK result to the client with JSON body 114 | return new OkObjectResult(output); 115 | } 116 | catch (Exception e) 117 | { 118 | // Create a JSON error object 119 | var output = new JObject(); 120 | output.Add("ClassName", e.GetType().Name); 121 | output.Add("Message", e.Message); 122 | output.Add("StackTrace", e.StackTrace); 123 | 124 | // Return a 400 bad request result to the client with JSON body 125 | return new BadRequestObjectResult(output); 126 | } 127 | finally 128 | { 129 | log.LogInformation($"Completed at: {DateTime.Now.ToString()} (UTC)."); 130 | } 131 | } 132 | 133 | /// 134 | /// Generate a stored procedure blob file path 135 | /// 136 | /// The database that the stored procedure belongs to 137 | /// The schema that the stored procedure belongs to 138 | /// The stored procedure's name 139 | /// The blob file path to the stored procedure 140 | private static string generateStoredProcedureBlobFilePath(string databaseName, string schemaName, string storedProcedureName) 141 | { 142 | // Validate the blob path elements 143 | #region Validate inputs 144 | if (!Regex.IsMatch(databaseName, _validBlobFolderNameRegex)) 145 | { 146 | throw new Exception($"Found invalid databaseName value: {databaseName}"); 147 | } 148 | if (!Regex.IsMatch(schemaName, _validBlobFolderNameRegex)) 149 | { 150 | throw new Exception($"Found invalid schemaName value: {schemaName}"); 151 | } 152 | if (!Regex.IsMatch(storedProcedureName, _validBlobFolderNameRegex)) 153 | { 154 | throw new Exception($"Found invalid storedProcedureName value: {storedProcedureName}"); 155 | } 156 | #endregion Validate inputs 157 | 158 | return String.Format("{0}/{1}/{2}.sql", databaseName, schemaName, storedProcedureName); 159 | } 160 | 161 | /// 162 | /// Read the contents of a file from Azure Blob Storage 163 | /// 164 | /// ILogger object 165 | /// The storage account connection string 166 | /// The storage account container name 167 | /// The blob file path to the stored procedure 168 | /// The contents of the file as a string 169 | private static async Task readContentFromBlobAsync(ILogger log, string storageAccountConnectionString, string storageAccountContainerName, string storageAccountBlobFilePath) 170 | { 171 | log.LogInformation($"Getting content from blob file: {storageAccountBlobFilePath}"); 172 | 173 | var storageAccount = CloudStorageAccount.Parse(storageAccountConnectionString); 174 | var myClient = storageAccount.CreateCloudBlobClient(); 175 | var container = myClient.GetContainerReference(storageAccountContainerName); 176 | var blockBlob = container.GetBlockBlobReference(storageAccountBlobFilePath); 177 | return await blockBlob.DownloadTextAsync(); 178 | } 179 | 180 | /// 181 | /// Generate the Snowflake SQL command to set all variables for the script 182 | /// 183 | /// JSON object containing the parameters 184 | /// The SQL command to set all variables 185 | private static string generateSetVariableCommand(dynamic parameters) 186 | { 187 | string snowflakeVariableNames = ""; 188 | string snowflakeVariableValues = ""; 189 | 190 | foreach (var param in parameters) 191 | { 192 | string parameterName = param.name.ToString(); 193 | string parameterType = param.type.ToString(); 194 | string parameterValue = param.value.ToString(); 195 | 196 | // Validate the parameter data 197 | #region Validate inputs 198 | if (!Regex.IsMatch(parameterName, _validParameterNameRegex)) 199 | { 200 | throw new Exception($"Found invalid parameter name: {parameterName}"); 201 | } 202 | if (!Regex.IsMatch(parameterType, _validParameterTypeRegex, RegexOptions.IgnoreCase)) 203 | { 204 | throw new Exception($"Found invalid parameter type for {parameterName}: {parameterType}"); 205 | } 206 | if (!Regex.IsMatch(parameterValue, _validParameterValueRegex)) 207 | { 208 | throw new Exception($"Found invalid parameter value for {parameterName}: {parameterValue}"); 209 | } 210 | #endregion Validate inputs 211 | 212 | // Add a new variable for this parameter 213 | snowflakeVariableNames += $"{parameterName},"; 214 | switch (parameterType.ToUpper()) 215 | { 216 | case "VARCHAR": 217 | snowflakeVariableValues += $"'{parameterValue}',"; 218 | break; 219 | case "NUMBER": 220 | snowflakeVariableValues += $"{parameterValue},"; 221 | break; 222 | default: 223 | throw new Exception($"Found invalid parameter type: {parameterType}"); 224 | } 225 | } 226 | 227 | // Remove the trailing comma from each string and return the final SQL command 228 | char[] charactersToRemove = {','}; 229 | snowflakeVariableNames = snowflakeVariableNames.TrimEnd(charactersToRemove); 230 | snowflakeVariableValues = snowflakeVariableValues.TrimEnd(charactersToRemove); 231 | return $"SET ({snowflakeVariableNames}) = ({snowflakeVariableValues})"; 232 | } 233 | 234 | /// 235 | /// Split the SQL text into individual commands 236 | /// 237 | /// The SQL text to be split 238 | /// An array of SQL commands 239 | private static string[] splitSqlCommands(string sqlText) 240 | { 241 | // Split sqlText on the query separator 242 | string[] sqlCommands = sqlText.Split(new[] { ";" }, StringSplitOptions.RemoveEmptyEntries); 243 | 244 | return sqlCommands; 245 | } 246 | 247 | /// 248 | /// Run SQL commands in Snowflake and return a JSON object with column/value pairs from first row of the result. 249 | /// See https://github.com/snowflakedb/snowflake-connector-net for more details. 250 | /// 251 | /// ILogger object 252 | /// Snowflake connection string 253 | /// The SQL set variable command to execute 254 | /// The SQL commands to execute 255 | /// JSON object with column/value pairs from first row of the result 256 | private static JObject runSnowflakeSqlCommands(ILogger log, string snowflakeConnectionString, string setVariableCommand, string[] sqlCommands) 257 | { 258 | var output = new JObject(); 259 | log.LogInformation($"Found {sqlCommands.Length} queries to execute"); 260 | 261 | using (IDbConnection conn = new SnowflakeDbConnection()) 262 | { 263 | // Connect to Snowflake 264 | conn.ConnectionString = snowflakeConnectionString; 265 | conn.Open(); 266 | 267 | using (IDbCommand cmd = conn.CreateCommand()) 268 | { 269 | // First run the set variable command, if we have one 270 | if (!String.IsNullOrEmpty(setVariableCommand)) 271 | { 272 | cmd.CommandText = setVariableCommand; 273 | log.LogInformation($"Running SQL set variable command: {cmd.CommandText}"); 274 | cmd.ExecuteNonQuery(); 275 | } 276 | 277 | // Run every query except the last one using ExecuteNonQuery() 278 | for (int i = 0; i < sqlCommands.Length - 1; i++) 279 | { 280 | cmd.CommandText = sqlCommands[i].Trim(); 281 | log.LogInformation($"Running SQL command #{i+1}: {cmd.CommandText}"); 282 | cmd.ExecuteNonQuery(); 283 | } 284 | 285 | // Finally run the last query using ExecuteReader() so we can collect the output 286 | cmd.CommandText = sqlCommands[sqlCommands.Length - 1].Trim(); 287 | log.LogInformation($"Running SQL command #{sqlCommands.Length} (final): {cmd.CommandText}"); 288 | IDataReader reader = cmd.ExecuteReader(); 289 | 290 | // The final result should be a table with one row and n columns, format the column/value pairs in JSON. 291 | // Warning: If more than one row is returned in the final result this will return the following error: 292 | // "Property with the same name already exists on object." 293 | while (reader.Read()) 294 | { 295 | for (int i = 0; i < reader.FieldCount; i++) 296 | { 297 | var columnName = reader.GetName(i); 298 | var value = reader[i].ToString(); 299 | output.Add(columnName, value); 300 | } 301 | } 302 | } 303 | 304 | conn.Close(); 305 | } 306 | 307 | return output; 308 | } 309 | } 310 | } 311 | -------------------------------------------------------------------------------- /host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0" 3 | } --------------------------------------------------------------------------------