├── .gitattributes ├── .gitignore ├── .markdownlint.jsonc ├── AzureFunctions-BatchedFileValidation.sln ├── AzureFunctions.Python ├── .funcignore ├── .gitignore ├── .pylintrc ├── .vscode │ ├── extensions.json │ ├── launch.json │ ├── settings.json │ └── tasks.json ├── README.md ├── common │ ├── __init__.py │ ├── batch.py │ └── blobstorageclient.py ├── host.json ├── local.settings.sample.json ├── process_batches │ ├── __init__.py │ └── function.json ├── proxies.json ├── requirements.txt └── validate_batch │ ├── __init__.py │ ├── batchvalidation.py │ └── function.json ├── AzureFunctions.v1.Durable ├── AzureFunctions.v1.Durable.csproj ├── Properties │ └── launchSettings.json └── host.json ├── AzureFunctions.v1 ├── AzureFunctions.v1.csproj └── host.json ├── AzureFunctions.v3.Durable ├── AzureFunctions.v3.Durable.csproj ├── DurableContextExtensions.cs ├── FunctionEnsureAllFiles.cs ├── FunctionValidateFileSet.cs ├── Orchestrator.cs └── host.json ├── AzureFunctions.v3.DurableEntities ├── AzureFunctions.v3.DurableEntities.csproj ├── BatchEntity.cs ├── Orchestrator.cs ├── README.md └── host.json ├── AzureFunctions.v3 ├── AzureFunctions.v3.csproj ├── CustomerBlobAttributes.cs ├── Extensions.cs ├── FunctionEnsureAllFiles.cs ├── FunctionValidateFileSet.cs ├── Helpers.cs ├── LockTableEntity.cs ├── host.json └── sample.local.settings.json ├── CODE_OF_CONDUCT.md ├── LICENSE ├── LogicApps ├── DeploymentHelper.cs ├── deploy.ps1 ├── deploy.sh ├── deployer.rb ├── parameters.json └── template.json ├── README.md ├── SECURITY.md ├── images └── ngroksubscription.png └── sampledata ├── cust1_20171010_1112_type1.csv ├── cust1_20171010_1112_type10.csv ├── cust1_20171010_1112_type2.csv ├── cust1_20171010_1112_type3.csv ├── cust1_20171010_1112_type4.csv ├── cust1_20171010_1112_type5.csv ├── cust1_20171010_1112_type7.csv ├── cust1_20171010_1112_type8.csv ├── cust1_20171010_1112_type9.csv ├── cust1_20171011_1112_type1.csv ├── cust1_20171011_1112_type10.csv ├── cust1_20171011_1112_type2.csv ├── cust1_20171011_1112_type3.csv ├── cust1_20171011_1112_type4.csv ├── cust1_20171011_1112_type5.csv ├── cust1_20171011_1112_type7.csv ├── cust1_20171011_1112_type8.csv └── cust1_20171011_1112_type9.csv /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # Azure Functions localsettings file 7 | local.settings.json 8 | 9 | # User-specific files 10 | *.rsuser 11 | *.suo 12 | *.user 13 | *.userosscache 14 | *.sln.docstates 15 | 16 | # User-specific files (MonoDevelop/Xamarin Studio) 17 | *.userprefs 18 | 19 | # Mono auto generated files 20 | mono_crash.* 21 | 22 | # Build results 23 | [Dd]ebug/ 24 | [Dd]ebugPublic/ 25 | [Rr]elease/ 26 | [Rr]eleases/ 27 | x64/ 28 | x86/ 29 | [Aa][Rr][Mm]/ 30 | [Aa][Rr][Mm]64/ 31 | bld/ 32 | [Bb]in/ 33 | [Oo]bj/ 34 | [Ll]og/ 35 | [Ll]ogs/ 36 | 37 | # Visual Studio 2015/2017 cache/options directory 38 | .vs/ 39 | # Uncomment if you have tasks that create the project's static files in wwwroot 40 | #wwwroot/ 41 | 42 | # Visual Studio 2017 auto generated files 43 | Generated\ Files/ 44 | 45 | # MSTest test Results 46 | [Tt]est[Rr]esult*/ 47 | [Bb]uild[Ll]og.* 48 | 49 | # NUnit 50 | *.VisualState.xml 51 | TestResult.xml 52 | nunit-*.xml 53 | 54 | # Build Results of an ATL Project 55 | [Dd]ebugPS/ 56 | [Rr]eleasePS/ 57 | dlldata.c 58 | 59 | # Benchmark Results 60 | BenchmarkDotNet.Artifacts/ 61 | 62 | # .NET Core 63 | project.lock.json 64 | project.fragment.lock.json 65 | artifacts/ 66 | 67 | # StyleCop 68 | StyleCopReport.xml 69 | 70 | # Files built by Visual Studio 71 | *_i.c 72 | *_p.c 73 | *_h.h 74 | *.ilk 75 | *.meta 76 | *.obj 77 | *.iobj 78 | *.pch 79 | *.pdb 80 | *.ipdb 81 | *.pgc 82 | *.pgd 83 | *.rsp 84 | *.sbr 85 | *.tlb 86 | *.tli 87 | *.tlh 88 | *.tmp 89 | *.tmp_proj 90 | *_wpftmp.csproj 91 | *.log 92 | *.vspscc 93 | *.vssscc 94 | .builds 95 | *.pidb 96 | *.svclog 97 | *.scc 98 | 99 | # Chutzpah Test files 100 | _Chutzpah* 101 | 102 | # Visual C++ cache files 103 | ipch/ 104 | *.aps 105 | *.ncb 106 | *.opendb 107 | *.opensdf 108 | *.sdf 109 | *.cachefile 110 | *.VC.db 111 | *.VC.VC.opendb 112 | 113 | # Visual Studio profiler 114 | *.psess 115 | *.vsp 116 | *.vspx 117 | *.sap 118 | 119 | # Visual Studio Trace Files 120 | *.e2e 121 | 122 | # TFS 2012 Local Workspace 123 | $tf/ 124 | 125 | # Guidance Automation Toolkit 126 | *.gpState 127 | 128 | # ReSharper is a .NET coding add-in 129 | _ReSharper*/ 130 | *.[Rr]e[Ss]harper 131 | *.DotSettings.user 132 | 133 | # JustCode is a .NET coding add-in 134 | .JustCode 135 | 136 | # TeamCity is a build add-in 137 | _TeamCity* 138 | 139 | # DotCover is a Code Coverage Tool 140 | *.dotCover 141 | 142 | # AxoCover is a Code Coverage Tool 143 | .axoCover/* 144 | !.axoCover/settings.json 145 | 146 | # Visual Studio code coverage results 147 | *.coverage 148 | *.coveragexml 149 | 150 | # NCrunch 151 | _NCrunch_* 152 | .*crunch*.local.xml 153 | nCrunchTemp_* 154 | 155 | # MightyMoose 156 | *.mm.* 157 | AutoTest.Net/ 158 | 159 | # Web workbench (sass) 160 | .sass-cache/ 161 | 162 | # Installshield output folder 163 | [Ee]xpress/ 164 | 165 | # DocProject is a documentation generator add-in 166 | DocProject/buildhelp/ 167 | DocProject/Help/*.HxT 168 | DocProject/Help/*.HxC 169 | DocProject/Help/*.hhc 170 | DocProject/Help/*.hhk 171 | DocProject/Help/*.hhp 172 | DocProject/Help/Html2 173 | DocProject/Help/html 174 | 175 | # Click-Once directory 176 | publish/ 177 | 178 | # Publish Web Output 179 | *.[Pp]ublish.xml 180 | *.azurePubxml 181 | # Note: Comment the next line if you want to checkin your web deploy settings, 182 | # but database connection strings (with potential passwords) will be unencrypted 183 | #*.pubxml 184 | *.publishproj 185 | 186 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 187 | # checkin your Azure Web App publish settings, but sensitive information contained 188 | # in these scripts will be unencrypted 189 | PublishScripts/ 190 | 191 | # NuGet Packages 192 | *.nupkg 193 | # NuGet Symbol Packages 194 | *.snupkg 195 | # The packages folder can be ignored because of Package Restore 196 | **/[Pp]ackages/* 197 | # except build/, which is used as an MSBuild target. 198 | !**/[Pp]ackages/build/ 199 | # Uncomment if necessary however generally it will be regenerated when needed 200 | #!**/[Pp]ackages/repositories.config 201 | # NuGet v3's project.json files produces more ignorable files 202 | *.nuget.props 203 | *.nuget.targets 204 | 205 | # Microsoft Azure Build Output 206 | csx/ 207 | *.build.csdef 208 | 209 | # Microsoft Azure Emulator 210 | ecf/ 211 | rcf/ 212 | 213 | # Windows Store app package directories and files 214 | AppPackages/ 215 | BundleArtifacts/ 216 | Package.StoreAssociation.xml 217 | _pkginfo.txt 218 | *.appx 219 | *.appxbundle 220 | *.appxupload 221 | 222 | # Visual Studio cache files 223 | # files ending in .cache can be ignored 224 | *.[Cc]ache 225 | # but keep track of directories ending in .cache 226 | !?*.[Cc]ache/ 227 | 228 | # Others 229 | ClientBin/ 230 | ~$* 231 | *~ 232 | *.dbmdl 233 | *.dbproj.schemaview 234 | *.jfm 235 | *.pfx 236 | *.publishsettings 237 | orleans.codegen.cs 238 | 239 | # Including strong name files can present a security risk 240 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 241 | #*.snk 242 | 243 | # Since there are multiple workflows, uncomment next line to ignore bower_components 244 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 245 | #bower_components/ 246 | 247 | # RIA/Silverlight projects 248 | Generated_Code/ 249 | 250 | # Backup & report files from converting an old project file 251 | # to a newer Visual Studio version. Backup files are not needed, 252 | # because we have git ;-) 253 | _UpgradeReport_Files/ 254 | Backup*/ 255 | UpgradeLog*.XML 256 | UpgradeLog*.htm 257 | ServiceFabricBackup/ 258 | *.rptproj.bak 259 | 260 | # SQL Server files 261 | *.mdf 262 | *.ldf 263 | *.ndf 264 | 265 | # Business Intelligence projects 266 | *.rdl.data 267 | *.bim.layout 268 | *.bim_*.settings 269 | *.rptproj.rsuser 270 | *- [Bb]ackup.rdl 271 | *- [Bb]ackup ([0-9]).rdl 272 | *- [Bb]ackup ([0-9][0-9]).rdl 273 | 274 | # Microsoft Fakes 275 | FakesAssemblies/ 276 | 277 | # GhostDoc plugin setting file 278 | *.GhostDoc.xml 279 | 280 | # Node.js Tools for Visual Studio 281 | .ntvs_analysis.dat 282 | node_modules/ 283 | 284 | # Visual Studio 6 build log 285 | *.plg 286 | 287 | # Visual Studio 6 workspace options file 288 | *.opt 289 | 290 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 291 | *.vbw 292 | 293 | # Visual Studio LightSwitch build output 294 | **/*.HTMLClient/GeneratedArtifacts 295 | **/*.DesktopClient/GeneratedArtifacts 296 | **/*.DesktopClient/ModelManifest.xml 297 | **/*.Server/GeneratedArtifacts 298 | **/*.Server/ModelManifest.xml 299 | _Pvt_Extensions 300 | 301 | # Paket dependency manager 302 | .paket/paket.exe 303 | paket-files/ 304 | 305 | # FAKE - F# Make 306 | .fake/ 307 | 308 | # CodeRush personal settings 309 | .cr/personal 310 | 311 | # Python Tools for Visual Studio (PTVS) 312 | __pycache__/ 313 | *.pyc 314 | 315 | # Cake - Uncomment if you are using it 316 | # tools/** 317 | # !tools/packages.config 318 | 319 | # Tabs Studio 320 | *.tss 321 | 322 | # Telerik's JustMock configuration file 323 | *.jmconfig 324 | 325 | # BizTalk build output 326 | *.btp.cs 327 | *.btm.cs 328 | *.odx.cs 329 | *.xsd.cs 330 | 331 | # OpenCover UI analysis results 332 | OpenCover/ 333 | 334 | # Azure Stream Analytics local run output 335 | ASALocalRun/ 336 | 337 | # MSBuild Binary and Structured Log 338 | *.binlog 339 | 340 | # NVidia Nsight GPU debugger configuration file 341 | *.nvuser 342 | 343 | # MFractors (Xamarin productivity tool) working folder 344 | .mfractor/ 345 | 346 | # Local History for Visual Studio 347 | .localhistory/ 348 | 349 | # BeatPulse healthcheck temp database 350 | healthchecksdb 351 | 352 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 353 | MigrationBackup/ 354 | 355 | # Ionide (cross platform F# VS Code tools) working folder 356 | .ionide/ 357 | -------------------------------------------------------------------------------- /.markdownlint.jsonc: -------------------------------------------------------------------------------- 1 | { 2 | "default":true, 3 | "no-inline-html": false, 4 | "line-length": false 5 | } -------------------------------------------------------------------------------- /AzureFunctions-BatchedFileValidation.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.29411.108 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FunctionsV1", "FunctionsV1", "{F8CE7BED-6111-476F-A5AF-2562039E2091}" 7 | EndProject 8 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FunctionsV3", "FunctionsV3", "{CFFE2BAD-8736-42FA-89FE-7774BF32588A}" 9 | EndProject 10 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AzureFunctions.v1", "AzureFunctions.v1\AzureFunctions.v1.csproj", "{5C74E325-7AAE-4263-AC8C-CBC8F7C36B57}" 11 | EndProject 12 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AzureFunctions.v1.Durable", "AzureFunctions.v1.Durable\AzureFunctions.v1.Durable.csproj", "{D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A}" 13 | EndProject 14 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AzureFunctions.v3", "AzureFunctions.v3\AzureFunctions.v3.csproj", "{6755D0BB-65A3-4D19-85F6-507AE9AB1E7B}" 15 | EndProject 16 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AzureFunctions.v3.Durable", "AzureFunctions.v3.Durable\AzureFunctions.v3.Durable.csproj", "{57193360-9F20-40C5-BD96-61E8724BDA1C}" 17 | EndProject 18 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{4708BF93-EC9C-4DE7-BC4D-99D89D81D744}" 19 | ProjectSection(SolutionItems) = preProject 20 | README.md = README.md 21 | EndProjectSection 22 | EndProject 23 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AzureFunctions.v3.DurableEntities", "AzureFunctions.v3.DurableEntities\AzureFunctions.v3.DurableEntities.csproj", "{0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201}" 24 | EndProject 25 | Global 26 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 27 | Debug|Any CPU = Debug|Any CPU 28 | Release|Any CPU = Release|Any CPU 29 | EndGlobalSection 30 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 31 | {5C74E325-7AAE-4263-AC8C-CBC8F7C36B57}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 32 | {5C74E325-7AAE-4263-AC8C-CBC8F7C36B57}.Debug|Any CPU.Build.0 = Debug|Any CPU 33 | {5C74E325-7AAE-4263-AC8C-CBC8F7C36B57}.Release|Any CPU.ActiveCfg = Release|Any CPU 34 | {5C74E325-7AAE-4263-AC8C-CBC8F7C36B57}.Release|Any CPU.Build.0 = Release|Any CPU 35 | {D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 36 | {D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A}.Debug|Any CPU.Build.0 = Debug|Any CPU 37 | {D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A}.Release|Any CPU.ActiveCfg = Release|Any CPU 38 | {D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A}.Release|Any CPU.Build.0 = Release|Any CPU 39 | {6755D0BB-65A3-4D19-85F6-507AE9AB1E7B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 40 | {6755D0BB-65A3-4D19-85F6-507AE9AB1E7B}.Debug|Any CPU.Build.0 = Debug|Any CPU 41 | {6755D0BB-65A3-4D19-85F6-507AE9AB1E7B}.Release|Any CPU.ActiveCfg = Release|Any CPU 42 | {6755D0BB-65A3-4D19-85F6-507AE9AB1E7B}.Release|Any CPU.Build.0 = Release|Any CPU 43 | {57193360-9F20-40C5-BD96-61E8724BDA1C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 44 | {57193360-9F20-40C5-BD96-61E8724BDA1C}.Debug|Any CPU.Build.0 = Debug|Any CPU 45 | {57193360-9F20-40C5-BD96-61E8724BDA1C}.Release|Any CPU.ActiveCfg = Release|Any CPU 46 | {57193360-9F20-40C5-BD96-61E8724BDA1C}.Release|Any CPU.Build.0 = Release|Any CPU 47 | {0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 48 | {0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201}.Debug|Any CPU.Build.0 = Debug|Any CPU 49 | {0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201}.Release|Any CPU.ActiveCfg = Release|Any CPU 50 | {0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201}.Release|Any CPU.Build.0 = Release|Any CPU 51 | EndGlobalSection 52 | GlobalSection(SolutionProperties) = preSolution 53 | HideSolutionNode = FALSE 54 | EndGlobalSection 55 | GlobalSection(NestedProjects) = preSolution 56 | {5C74E325-7AAE-4263-AC8C-CBC8F7C36B57} = {F8CE7BED-6111-476F-A5AF-2562039E2091} 57 | {D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A} = {F8CE7BED-6111-476F-A5AF-2562039E2091} 58 | {6755D0BB-65A3-4D19-85F6-507AE9AB1E7B} = {CFFE2BAD-8736-42FA-89FE-7774BF32588A} 59 | {57193360-9F20-40C5-BD96-61E8724BDA1C} = {CFFE2BAD-8736-42FA-89FE-7774BF32588A} 60 | {0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201} = {CFFE2BAD-8736-42FA-89FE-7774BF32588A} 61 | EndGlobalSection 62 | GlobalSection(ExtensibilityGlobals) = postSolution 63 | SolutionGuid = {65B203C2-3EEE-4F9C-B4EB-AB8B67A71F67} 64 | EndGlobalSection 65 | EndGlobal 66 | -------------------------------------------------------------------------------- /AzureFunctions.Python/.funcignore: -------------------------------------------------------------------------------- 1 | .git* 2 | .vscode 3 | local.settings.json 4 | test 5 | .venv -------------------------------------------------------------------------------- /AzureFunctions.Python/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # pipenv 86 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 87 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 88 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not 89 | # install all needed dependencies. 90 | #Pipfile.lock 91 | 92 | # celery beat schedule file 93 | celerybeat-schedule 94 | 95 | # SageMath parsed files 96 | *.sage.py 97 | 98 | # Environments 99 | .env 100 | .venv 101 | env/ 102 | venv/ 103 | ENV/ 104 | env.bak/ 105 | venv.bak/ 106 | 107 | # Spyder project settings 108 | .spyderproject 109 | .spyproject 110 | 111 | # Rope project settings 112 | .ropeproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | .dmypy.json 120 | dmypy.json 121 | 122 | # Pyre type checker 123 | .pyre/ 124 | 125 | # Azure Functions artifacts 126 | bin 127 | obj 128 | appsettings.json 129 | local.settings.json 130 | .python_packages -------------------------------------------------------------------------------- /AzureFunctions.Python/.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # A comma-separated list of package or module names from where C extensions may 4 | # be loaded. Extensions are loading into the active Python interpreter and may 5 | # run arbitrary code. 6 | extension-pkg-whitelist= 7 | 8 | # Specify a score threshold to be exceeded before program exits with error. 9 | fail-under=10 10 | 11 | # Add files or directories to the blacklist. They should be base names, not 12 | # paths. 13 | ignore=CVS 14 | 15 | # Add files or directories matching the regex patterns to the blacklist. The 16 | # regex matches against base names, not paths. 17 | ignore-patterns= 18 | 19 | # Python code to execute, usually for sys.path manipulation such as 20 | # pygtk.require(). 21 | #init-hook= 22 | 23 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 24 | # number of processors available to use. 25 | jobs=1 26 | 27 | # Control the amount of potential inferred values when inferring a single 28 | # object. This can help the performance when dealing with large functions or 29 | # complex, nested conditions. 30 | limit-inference-results=100 31 | 32 | # List of plugins (as comma separated values of python module names) to load, 33 | # usually to register additional checkers. 34 | load-plugins= 35 | 36 | # Pickle collected data for later comparisons. 37 | persistent=yes 38 | 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit 40 | # user-friendly hints instead of false-positive error messages. 41 | suggestion-mode=yes 42 | 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the 44 | # active Python interpreter and may run arbitrary code. 45 | unsafe-load-any-extension=no 46 | 47 | 48 | [MESSAGES CONTROL] 49 | 50 | # Only show warnings with the listed confidence levels. Leave empty to show 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. 52 | confidence= 53 | 54 | # Disable the message, report, category or checker with the given id(s). You 55 | # can either give multiple identifiers separated by comma (,) or put this 56 | # option multiple times (only on the command line, not in the configuration 57 | # file where it should appear only once). You can also use "--disable=all" to 58 | # disable everything first and then reenable specific checks. For example, if 59 | # you want to run only the similarities checker, you can use "--disable=all 60 | # --enable=similarities". If you want to run only the classes checker, but have 61 | # no Warning level messages displayed, use "--disable=all --enable=classes 62 | # --disable=W". 63 | disable=print-statement, 64 | parameter-unpacking, 65 | unpacking-in-except, 66 | old-raise-syntax, 67 | backtick, 68 | long-suffix, 69 | old-ne-operator, 70 | old-octal-literal, 71 | import-star-module-level, 72 | non-ascii-bytes-literal, 73 | raw-checker-failed, 74 | bad-inline-option, 75 | locally-disabled, 76 | file-ignored, 77 | suppressed-message, 78 | useless-suppression, 79 | deprecated-pragma, 80 | use-symbolic-message-instead, 81 | apply-builtin, 82 | basestring-builtin, 83 | buffer-builtin, 84 | cmp-builtin, 85 | coerce-builtin, 86 | execfile-builtin, 87 | file-builtin, 88 | long-builtin, 89 | raw_input-builtin, 90 | reduce-builtin, 91 | standarderror-builtin, 92 | unicode-builtin, 93 | xrange-builtin, 94 | coerce-method, 95 | delslice-method, 96 | getslice-method, 97 | setslice-method, 98 | no-absolute-import, 99 | old-division, 100 | dict-iter-method, 101 | dict-view-method, 102 | next-method-called, 103 | metaclass-assignment, 104 | indexing-exception, 105 | raising-string, 106 | reload-builtin, 107 | oct-method, 108 | hex-method, 109 | nonzero-method, 110 | cmp-method, 111 | input-builtin, 112 | round-builtin, 113 | intern-builtin, 114 | unichr-builtin, 115 | map-builtin-not-iterating, 116 | zip-builtin-not-iterating, 117 | range-builtin-not-iterating, 118 | filter-builtin-not-iterating, 119 | using-cmp-argument, 120 | eq-without-hash, 121 | div-method, 122 | idiv-method, 123 | rdiv-method, 124 | exception-message-attribute, 125 | invalid-str-codec, 126 | sys-max-int, 127 | bad-python3-import, 128 | deprecated-string-function, 129 | deprecated-str-translate-call, 130 | deprecated-itertools-function, 131 | deprecated-types-field, 132 | next-method-defined, 133 | dict-items-not-iterating, 134 | dict-keys-not-iterating, 135 | dict-values-not-iterating, 136 | deprecated-operator-function, 137 | deprecated-urllib-function, 138 | xreadlines-attribute, 139 | deprecated-sys-function, 140 | exception-escape, 141 | comprehension-escape, 142 | broad-except, 143 | relative-beyond-top-level 144 | 145 | # Enable the message, report, category or checker with the given id(s). You can 146 | # either give multiple identifier separated by comma (,) or put this option 147 | # multiple time (only on the command line, not in the configuration file where 148 | # it should appear only once). See also the "--disable" option for examples. 149 | enable=c-extension-no-member 150 | 151 | 152 | [REPORTS] 153 | 154 | # Python expression which should return a score less than or equal to 10. You 155 | # have access to the variables 'error', 'warning', 'refactor', and 'convention' 156 | # which contain the number of messages in each category, as well as 'statement' 157 | # which is the total number of statements analyzed. This score is used by the 158 | # global evaluation report (RP0004). 159 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 160 | 161 | # Template used to display messages. This is a python new-style format string 162 | # used to format the message information. See doc for all details. 163 | #msg-template= 164 | 165 | # Set the output format. Available formats are text, parseable, colorized, json 166 | # and msvs (visual studio). You can also give a reporter class, e.g. 167 | # mypackage.mymodule.MyReporterClass. 168 | output-format=text 169 | 170 | # Tells whether to display a full report or only the messages. 171 | reports=no 172 | 173 | # Activate the evaluation score. 174 | score=yes 175 | 176 | 177 | [REFACTORING] 178 | 179 | # Maximum number of nested blocks for function / method body 180 | max-nested-blocks=5 181 | 182 | # Complete name of functions that never returns. When checking for 183 | # inconsistent-return-statements if a never returning function is called then 184 | # it will be considered as an explicit return statement and no message will be 185 | # printed. 186 | never-returning-functions=sys.exit 187 | 188 | 189 | [BASIC] 190 | 191 | # Naming style matching correct argument names. 192 | argument-naming-style=snake_case 193 | 194 | # Regular expression matching correct argument names. Overrides argument- 195 | # naming-style. 196 | #argument-rgx= 197 | 198 | # Naming style matching correct attribute names. 199 | attr-naming-style=snake_case 200 | 201 | # Regular expression matching correct attribute names. Overrides attr-naming- 202 | # style. 203 | #attr-rgx= 204 | 205 | # Bad variable names which should always be refused, separated by a comma. 206 | bad-names=foo, 207 | bar, 208 | baz, 209 | toto, 210 | tutu, 211 | tata 212 | 213 | # Bad variable names regexes, separated by a comma. If names match any regex, 214 | # they will always be refused 215 | bad-names-rgxs= 216 | 217 | # Naming style matching correct class attribute names. 218 | class-attribute-naming-style=any 219 | 220 | # Regular expression matching correct class attribute names. Overrides class- 221 | # attribute-naming-style. 222 | #class-attribute-rgx= 223 | 224 | # Naming style matching correct class names. 225 | class-naming-style=PascalCase 226 | 227 | # Regular expression matching correct class names. Overrides class-naming- 228 | # style. 229 | #class-rgx= 230 | 231 | # Naming style matching correct constant names. 232 | const-naming-style=UPPER_CASE 233 | 234 | # Regular expression matching correct constant names. Overrides const-naming- 235 | # style. 236 | #const-rgx= 237 | 238 | # Minimum line length for functions/classes that require docstrings, shorter 239 | # ones are exempt. 240 | docstring-min-length=-1 241 | 242 | # Naming style matching correct function names. 243 | function-naming-style=snake_case 244 | 245 | # Regular expression matching correct function names. Overrides function- 246 | # naming-style. 247 | #function-rgx= 248 | 249 | # Good variable names which should always be accepted, separated by a comma. 250 | good-names=i, 251 | j, 252 | k, 253 | ex, 254 | Run, 255 | _ 256 | 257 | # Good variable names regexes, separated by a comma. If names match any regex, 258 | # they will always be accepted 259 | good-names-rgxs= 260 | 261 | # Include a hint for the correct naming format with invalid-name. 262 | include-naming-hint=no 263 | 264 | # Naming style matching correct inline iteration names. 265 | inlinevar-naming-style=any 266 | 267 | # Regular expression matching correct inline iteration names. Overrides 268 | # inlinevar-naming-style. 269 | #inlinevar-rgx= 270 | 271 | # Naming style matching correct method names. 272 | method-naming-style=snake_case 273 | 274 | # Regular expression matching correct method names. Overrides method-naming- 275 | # style. 276 | #method-rgx= 277 | 278 | # Naming style matching correct module names. 279 | module-naming-style=snake_case 280 | 281 | # Regular expression matching correct module names. Overrides module-naming- 282 | # style. 283 | #module-rgx= 284 | 285 | # Colon-delimited sets of names that determine each other's naming style when 286 | # the name regexes allow several styles. 287 | name-group= 288 | 289 | # Regular expression which should only match function or class names that do 290 | # not require a docstring. 291 | no-docstring-rgx=^_ 292 | 293 | # List of decorators that produce properties, such as abc.abstractproperty. Add 294 | # to this list to register other decorators that produce valid properties. 295 | # These decorators are taken in consideration only for invalid-name. 296 | property-classes=abc.abstractproperty 297 | 298 | # Naming style matching correct variable names. 299 | variable-naming-style=snake_case 300 | 301 | # Regular expression matching correct variable names. Overrides variable- 302 | # naming-style. 303 | #variable-rgx= 304 | 305 | 306 | [FORMAT] 307 | 308 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 309 | expected-line-ending-format= 310 | 311 | # Regexp for a line that is allowed to be longer than the limit. 312 | ignore-long-lines=^\s*(# )??$ 313 | 314 | # Number of spaces of indent required inside a hanging or continued line. 315 | indent-after-paren=4 316 | 317 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 318 | # tab). 319 | indent-string=' ' 320 | 321 | # Maximum number of characters on a single line. 322 | max-line-length=100 323 | 324 | # Maximum number of lines in a module. 325 | max-module-lines=1000 326 | 327 | # List of optional constructs for which whitespace checking is disabled. `dict- 328 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 329 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 330 | # `empty-line` allows space-only lines. 331 | no-space-check=trailing-comma, 332 | dict-separator 333 | 334 | # Allow the body of a class to be on the same line as the declaration if body 335 | # contains single statement. 336 | single-line-class-stmt=no 337 | 338 | # Allow the body of an if to be on the same line as the test if there is no 339 | # else. 340 | single-line-if-stmt=no 341 | 342 | 343 | [LOGGING] 344 | 345 | # The type of string formatting that logging methods do. `old` means using % 346 | # formatting, `new` is for `{}` formatting. 347 | logging-format-style=old 348 | 349 | # Logging modules to check that the string format arguments are in logging 350 | # function parameter format. 351 | logging-modules=logging 352 | 353 | 354 | [MISCELLANEOUS] 355 | 356 | # List of note tags to take in consideration, separated by a comma. 357 | notes=FIXME, 358 | XXX, 359 | TODO 360 | 361 | # Regular expression of note tags to take in consideration. 362 | #notes-rgx= 363 | 364 | 365 | [SIMILARITIES] 366 | 367 | # Ignore comments when computing similarities. 368 | ignore-comments=yes 369 | 370 | # Ignore docstrings when computing similarities. 371 | ignore-docstrings=yes 372 | 373 | # Ignore imports when computing similarities. 374 | ignore-imports=no 375 | 376 | # Minimum lines number of a similarity. 377 | min-similarity-lines=4 378 | 379 | 380 | [SPELLING] 381 | 382 | # Limits count of emitted suggestions for spelling mistakes. 383 | max-spelling-suggestions=4 384 | 385 | # Spelling dictionary name. Available dictionaries: none. To make it work, 386 | # install the python-enchant package. 387 | spelling-dict= 388 | 389 | # List of comma separated words that should not be checked. 390 | spelling-ignore-words= 391 | 392 | # A path to a file that contains the private dictionary; one word per line. 393 | spelling-private-dict-file= 394 | 395 | # Tells whether to store unknown words to the private dictionary (see the 396 | # --spelling-private-dict-file option) instead of raising a message. 397 | spelling-store-unknown-words=no 398 | 399 | 400 | [STRING] 401 | 402 | # This flag controls whether inconsistent-quotes generates a warning when the 403 | # character used as a quote delimiter is used inconsistently within a module. 404 | check-quote-consistency=no 405 | 406 | # This flag controls whether the implicit-str-concat should generate a warning 407 | # on implicit string concatenation in sequences defined over several lines. 408 | check-str-concat-over-line-jumps=no 409 | 410 | 411 | [TYPECHECK] 412 | 413 | # List of decorators that produce context managers, such as 414 | # contextlib.contextmanager. Add to this list to register other decorators that 415 | # produce valid context managers. 416 | contextmanager-decorators=contextlib.contextmanager 417 | 418 | # List of members which are set dynamically and missed by pylint inference 419 | # system, and so shouldn't trigger E1101 when accessed. Python regular 420 | # expressions are accepted. 421 | generated-members= 422 | 423 | # Tells whether missing members accessed in mixin class should be ignored. A 424 | # mixin class is detected if its name ends with "mixin" (case insensitive). 425 | ignore-mixin-members=yes 426 | 427 | # Tells whether to warn about missing members when the owner of the attribute 428 | # is inferred to be None. 429 | ignore-none=yes 430 | 431 | # This flag controls whether pylint should warn about no-member and similar 432 | # checks whenever an opaque object is returned when inferring. The inference 433 | # can return multiple potential results while evaluating a Python object, but 434 | # some branches might not be evaluated, which results in partial inference. In 435 | # that case, it might be useful to still emit no-member and other checks for 436 | # the rest of the inferred objects. 437 | ignore-on-opaque-inference=yes 438 | 439 | # List of class names for which member attributes should not be checked (useful 440 | # for classes with dynamically set attributes). This supports the use of 441 | # qualified names. 442 | ignored-classes=optparse.Values,thread._local,_thread._local 443 | 444 | # List of module names for which member attributes should not be checked 445 | # (useful for modules/projects where namespaces are manipulated during runtime 446 | # and thus existing member attributes cannot be deduced by static analysis). It 447 | # supports qualified module names, as well as Unix pattern matching. 448 | ignored-modules= 449 | 450 | # Show a hint with possible names when a member name was not found. The aspect 451 | # of finding the hint is based on edit distance. 452 | missing-member-hint=yes 453 | 454 | # The minimum edit distance a name should have in order to be considered a 455 | # similar match for a missing member name. 456 | missing-member-hint-distance=1 457 | 458 | # The total number of similar names that should be taken in consideration when 459 | # showing a hint for a missing member. 460 | missing-member-max-choices=1 461 | 462 | # List of decorators that change the signature of a decorated function. 463 | signature-mutators= 464 | 465 | 466 | [VARIABLES] 467 | 468 | # List of additional names supposed to be defined in builtins. Remember that 469 | # you should avoid defining new builtins when possible. 470 | additional-builtins= 471 | 472 | # Tells whether unused global variables should be treated as a violation. 473 | allow-global-unused-variables=yes 474 | 475 | # List of strings which can identify a callback function by name. A callback 476 | # name must start or end with one of those strings. 477 | callbacks=cb_, 478 | _cb 479 | 480 | # A regular expression matching the name of dummy variables (i.e. expected to 481 | # not be used). 482 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 483 | 484 | # Argument names that match this expression will be ignored. Default to name 485 | # with leading underscore. 486 | ignored-argument-names=_.*|^ignored_|^unused_ 487 | 488 | # Tells whether we should check for unused import in __init__ files. 489 | init-import=no 490 | 491 | # List of qualified module names which can have objects that can redefine 492 | # builtins. 493 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 494 | 495 | 496 | [CLASSES] 497 | 498 | # List of method names used to declare (i.e. assign) instance attributes. 499 | defining-attr-methods=__init__, 500 | __new__, 501 | setUp, 502 | __post_init__ 503 | 504 | # List of member names, which should be excluded from the protected access 505 | # warning. 506 | exclude-protected=_asdict, 507 | _fields, 508 | _replace, 509 | _source, 510 | _make 511 | 512 | # List of valid names for the first argument in a class method. 513 | valid-classmethod-first-arg=cls 514 | 515 | # List of valid names for the first argument in a metaclass class method. 516 | valid-metaclass-classmethod-first-arg=cls 517 | 518 | 519 | [DESIGN] 520 | 521 | # Maximum number of arguments for function / method. 522 | max-args=5 523 | 524 | # Maximum number of attributes for a class (see R0902). 525 | max-attributes=7 526 | 527 | # Maximum number of boolean expressions in an if statement (see R0916). 528 | max-bool-expr=5 529 | 530 | # Maximum number of branch for function / method body. 531 | max-branches=12 532 | 533 | # Maximum number of locals for function / method body. 534 | max-locals=15 535 | 536 | # Maximum number of parents for a class (see R0901). 537 | max-parents=7 538 | 539 | # Maximum number of public methods for a class (see R0904). 540 | max-public-methods=20 541 | 542 | # Maximum number of return / yield for function / method body. 543 | max-returns=6 544 | 545 | # Maximum number of statements in function / method body. 546 | max-statements=50 547 | 548 | # Minimum number of public methods for a class (see R0903). 549 | min-public-methods=2 550 | 551 | 552 | [IMPORTS] 553 | 554 | # List of modules that can be imported at any level, not just the top level 555 | # one. 556 | allow-any-import-level= 557 | 558 | # Allow wildcard imports from modules that define __all__. 559 | allow-wildcard-with-all=no 560 | 561 | # Analyse import fallback blocks. This can be used to support both Python 2 and 562 | # 3 compatible code, which means that the block might have code that exists 563 | # only in one or another interpreter, leading to false positives when analysed. 564 | analyse-fallback-blocks=no 565 | 566 | # Deprecated modules which should not be used, separated by a comma. 567 | deprecated-modules=optparse,tkinter.tix 568 | 569 | # Create a graph of external dependencies in the given file (report RP0402 must 570 | # not be disabled). 571 | ext-import-graph= 572 | 573 | # Create a graph of every (i.e. internal and external) dependencies in the 574 | # given file (report RP0402 must not be disabled). 575 | import-graph= 576 | 577 | # Create a graph of internal dependencies in the given file (report RP0402 must 578 | # not be disabled). 579 | int-import-graph= 580 | 581 | # Force import order to recognize a module as part of the standard 582 | # compatibility libraries. 583 | known-standard-library= 584 | 585 | # Force import order to recognize a module as part of a third party library. 586 | known-third-party=enchant 587 | 588 | # Couples of modules and preferred modules, separated by a comma. 589 | preferred-modules= 590 | 591 | 592 | [EXCEPTIONS] 593 | 594 | # Exceptions that will emit a warning when being caught. Defaults to 595 | # "BaseException, Exception". 596 | overgeneral-exceptions=BaseException, 597 | Exception 598 | -------------------------------------------------------------------------------- /AzureFunctions.Python/.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "ms-azuretools.vscode-azurefunctions", 4 | "ms-python.python" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /AzureFunctions.Python/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Attach to Python Functions", 6 | "type": "python", 7 | "request": "attach", 8 | "port": 9091, 9 | "preLaunchTask": "func: host start" 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /AzureFunctions.Python/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "azureFunctions.deploySubpath": ".", 3 | "azureFunctions.scmDoBuildDuringDeployment": true, 4 | "azureFunctions.pythonVenv": ".venv", 5 | "azureFunctions.projectLanguage": "Python", 6 | "azureFunctions.projectRuntime": "~3", 7 | "debug.internalConsoleOptions": "neverOpen", 8 | "python.linting.pylintEnabled": true, 9 | "python.linting.enabled": true, 10 | "python.linting.lintOnSave": true 11 | } -------------------------------------------------------------------------------- /AzureFunctions.Python/.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "type": "func", 6 | "command": "host start", 7 | "problemMatcher": "$func-watch", 8 | "isBackground": true, 9 | "dependsOn": "pipInstall" 10 | }, 11 | { 12 | "label": "pipInstall", 13 | "type": "shell", 14 | "osx": { 15 | "command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt" 16 | }, 17 | "windows": { 18 | "command": "${config:azureFunctions.pythonVenv}\\Scripts\\python -m pip install -r requirements.txt" 19 | }, 20 | "linux": { 21 | "command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt" 22 | }, 23 | "problemMatcher": [] 24 | } 25 | ] 26 | } -------------------------------------------------------------------------------- /AzureFunctions.Python/README.md: -------------------------------------------------------------------------------- 1 | # Batch Validation 2 | 3 | This folder contains sample Python Azure Functions used to validate a batch of .csv files stored in Azure Blob Storage. The path for each file in the batch looks like `{subpath}/{customer}_{date}_{time}_{type}.csv` (e.g. `input_data/cust1_20171010_1112_type1.csv`). 4 | 5 | The `process_batch` function is time triggered and gets the list of batches that need to be validated and sends their information to an Azure Queue Storage. 6 | The `validate_batch` function is queue triggered and validates each of the batches that was sent to the queue. The sample performs basic validation on each .csv file: appropriate file encoding, expected number of columns and a check on every field to ensure it's enclosed by double quotes. The batch files are moved into a `valid` or `invalid` folder, depending on the result of the validation process. 7 | 8 | ## Development details 9 | 10 | Code in this folder has been developed with [Visual Studio Code](https://code.visualstudio.com/), using [Azure Functions extension](https://marketplace.visualstudio.com/items?itemName=ms-azuretools.vscode-azurefunctions) and [Python extension](https://marketplace.visualstudio.com/items?itemName=ms-python.python) for Visual Studio Code (more details in [Useful links section](#useful-links) below). But you may still use any other editor or IDE of your liking. 11 | 12 | Azure Functions and Python are multi-platform, so you should be able to use this code on either Windows, Linux or Mac. 13 | 14 | ## Configure your environment 15 | 16 | See the official documentation for details: [Configure your environment](https://docs.microsoft.com/en-us/azure/azure-functions/functions-create-first-function-vs-code?pivots=programming-language-python#configure-your-environment). 17 | 18 | ### Visual Studio Code and Python virtual environment 19 | 20 | To be able to run/debug the code, Visual Studio Code settings found in `.vscode` folder depend on the existence of a `.venv` folder containing a Python virtual environment. 21 | 22 | While in this folder, you may create such an environment with `python -m venv ./.venv`. 23 | 24 | ### TLS/SSL error when using PIP on Python virtual environment (Windows) 25 | 26 | If virtual environment's pip fails to install packages with an error like the following: 27 | 28 | ```text 29 | pip is configured with locations that require TLS/SSL, however the ssl module in Python is not available. 30 | ``` 31 | 32 | and you have e.g. [Anaconda](https://www.anaconda.com/) installed, add the following paths to the Windows `Path` environment variable: 33 | 34 | ```text 35 | <>\Anaconda3 36 | <>\Anaconda3\scripts 37 | <>\Anaconda3\library\bin 38 | ``` 39 | 40 | ## Running the project locally 41 | 42 | Make sure you have a `local.settings.json` file. See the `local.settings.sample.json` file for what this should look like. 43 | 44 | If you are using Visual Studio Code, just run the code with `Run (Ctrl+Shift+D) > Attach to Python Functions`. It will activate the Python virtual environment and import required Python packages from `requirements.txt` file. 45 | 46 | If you are in the console, don't forget to activate the Python virtual environment first (using `.venv\scripts\activate` on Windows or `.venv/bin/python` on Linux) and import required Python packages (`pip install -r requirements.txt`). Then run the functions with `func start`. 47 | 48 | ## Useful links 49 | 50 | Azure Functions 51 | 52 | - [Work with Azure Functions Core Tools](https://docs.microsoft.com/en-us/azure/azure-functions/functions-run-local) 53 | - [Azure Functions Python developer guide](https://docs.microsoft.com/en-us/azure/azure-functions/functions-reference-python) 54 | 55 | Azure Functions with Visual Studio Code 56 | 57 | - [Quickstart: Create a Python function in Azure using Visual Studio Code](https://docs.microsoft.com/en-us/azure/azure-functions/functions-create-first-function-vs-code?pivots=programming-language-python) 58 | 59 | Azure Blob Storage 60 | 61 | - [Quickstart: Manage blobs with Python v12 SDK](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python) 62 | - [Azure Storage Blobs client library for Python - Version 12.3.0](https://docs.microsoft.com/en-us/azure/developer/python/sdk/storage/storage-blob-readme?view=storage-py-v12) 63 | - [Azure Storage Blob client library for Python Samples](https://docs.microsoft.com/en-us/samples/azure/azure-sdk-for-python/storage-blob-samples/) 64 | -------------------------------------------------------------------------------- /AzureFunctions.Python/common/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module with common classes to all Azure Functions. 3 | """ 4 | 5 | from .blobstorageclient import * 6 | from .batch import * 7 | -------------------------------------------------------------------------------- /AzureFunctions.Python/common/batch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Batch of files in Azure Blob Storage. 3 | """ 4 | from datetime import datetime 5 | from enum import Enum, auto 6 | import json 7 | import jsonpickle 8 | 9 | # Types of files in a batch 10 | TYPES = ['type1', 'type2', 'type3', 'type4', 'type5', 'type7', 'type8', 'type9', 'type10'] 11 | 12 | # Index of TYPES array for the reference type (used to set the status of the batch in its blob 13 | # metadata) 14 | REF_TYPE = 0 15 | 16 | class Status(Enum): 17 | """ 18 | Status of the validatin of a batch. 19 | This status will be set in the metadata of the blob referenced by TYPES[REF_TYPE] 20 | """ 21 | ERROR = auto() 22 | RUNNING = auto() 23 | VALID = auto() 24 | INVALID = auto() 25 | 26 | class Batch: 27 | """ 28 | Class representing a batch of files in Azure Blob Storage. 29 | A batch of files is comprised of several .csv files, one per type in TYPES array. 30 | Path for csv files: 31 | {subpath}/{customer}_{date}_{time}_{type}.csv 32 | Example: 33 | input_data/cust1_20171010_1112_type1.csv 34 | """ 35 | 36 | def __init__(self, customer: str, timestamp: datetime): 37 | self.__customer = customer 38 | self.__timestamp = timestamp 39 | self.__types = {} 40 | self.__status = None 41 | 42 | @property 43 | def customer(self): 44 | """ 45 | Customer name. 46 | """ 47 | return self.__customer 48 | 49 | @property 50 | def timestamp(self): 51 | """ 52 | Datetime when the batch was created. 53 | """ 54 | return self.__timestamp 55 | 56 | @property 57 | def types(self): 58 | """ 59 | Dictionary of .csv files (should have one per type in TYPES array). 60 | """ 61 | return self.__types 62 | 63 | @types.setter 64 | def types(self, types: dict): 65 | self.__types = types 66 | 67 | @property 68 | def status(self): 69 | """ 70 | Status of the batch validation. 71 | """ 72 | return self.__status 73 | 74 | @status.setter 75 | def status(self, status: Status): 76 | self.__status = status 77 | 78 | def is_complete(self): 79 | """ 80 | True if all types in TYPES array are present in the batch. 81 | """ 82 | for file_type in TYPES: 83 | if not self.__types.get(file_type): 84 | return False 85 | return True 86 | 87 | def serialize(self) -> str: 88 | """ 89 | Serializes the contents of this class as json. 90 | """ 91 | encoded_batch = jsonpickle.encode(self, unpicklable=False) 92 | return json.dumps(encoded_batch, indent=4) 93 | -------------------------------------------------------------------------------- /AzureFunctions.Python/common/blobstorageclient.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interaction with Azure Blob Storage. 3 | """ 4 | 5 | import re 6 | import datetime 7 | import logging 8 | import time 9 | from enum import Enum 10 | from io import TextIOWrapper, BytesIO 11 | 12 | import dateutil.parser as dt 13 | from azure.storage.blob import ContainerClient, BlobClient 14 | 15 | from .batch import Batch, Status, TYPES, REF_TYPE 16 | 17 | STATUS = 'status' 18 | CSV = 'csv' 19 | DEFAULT_ENCODING = 'UTF-8-SIG' 20 | 21 | class NamePart(Enum): 22 | """ 23 | This class gives meaning to the Path Segments of Azure Blob Storage Urls of the files. 24 | """ 25 | Customer = 0 26 | Date = 1 27 | Time = 2 28 | Type = 3 29 | 30 | class BlobStorageClient(): 31 | """ 32 | Class to interact with Azure Blob Storage. 33 | """ 34 | 35 | def __init__(self, container_client: ContainerClient): 36 | self.__container_client = container_client 37 | 38 | def get_batches(self, subpath: str = None) -> [Batch]: 39 | """ 40 | Get batches from Azure Blob Storage. 41 | Valid paths for the csv files in a batch: 42 | {subpath}/{customer}_{date}_{time}_{type}.csv 43 | Example: 44 | input_data/cust1_20171010_1112_type1.csv 45 | """ 46 | batches = {} 47 | for blob in self.__container_client.list_blobs(name_starts_with=subpath, 48 | include='metadata'): 49 | try: 50 | # Remove subpath 51 | full_name = re.sub(f'^{subpath}/', '', blob.name) 52 | full_name_parts = full_name.rsplit('.', 1) 53 | 54 | # Check file extension 55 | extension = full_name_parts[1].lower() 56 | if extension != CSV: 57 | # Ignore invalid extensions 58 | continue 59 | 60 | # Get customer 61 | name = full_name_parts[0] 62 | name_parts = name.rsplit('_') 63 | customer = name_parts[NamePart.Customer.value] 64 | 65 | # Get timestamp 66 | file_date = name_parts[NamePart.Date.value] 67 | file_time = name_parts[NamePart.Time.value] 68 | timestamp = self.__get_timestamp(file_date, file_time) 69 | 70 | # Check file type 71 | file_type = name_parts[NamePart.Type.value] 72 | if file_type not in TYPES: 73 | # Ignore invalid types 74 | continue 75 | 76 | # Create batch for customer & timestamp and add it to dictionary if needed 77 | if not batches.get(customer): 78 | batches[customer] = {} 79 | if not batches[customer].get(timestamp): 80 | batches[customer][timestamp] = Batch(customer, timestamp) 81 | 82 | # Set blob path for file type 83 | batches[customer][timestamp].types[file_type] = blob.name 84 | 85 | # Set batch status from the reference's type blob metadata 86 | if file_type == TYPES[REF_TYPE]: 87 | batches[customer][timestamp].status = self.__get_blob_status(blob.metadata) 88 | 89 | except Exception as ex: 90 | # Ignore invalid blobs 91 | logging.exception("EXCEPTION while listing blob %s", blob.name, exc_info=ex) 92 | 93 | # Return the batches ready for validation 94 | ready_batches = [] 95 | for customer_batches in batches.values(): 96 | for batch in customer_batches.values(): 97 | if batch.is_complete() and self.__batch_needs_validation(batch): 98 | ready_batches.append(batch) 99 | 100 | return ready_batches 101 | 102 | def save_batch_status(self, batch: Batch): 103 | """ 104 | Save the status of a batch to the blob metadata of the metadata json file. 105 | """ 106 | ref_type = batch.types.get(TYPES[REF_TYPE]) if batch else None 107 | status = {STATUS: batch.status.name} if batch.status is not None else None 108 | if ref_type: 109 | self.__container_client.get_blob_client(ref_type).set_blob_metadata(status) 110 | 111 | def download_blob_content(self, path: str) -> TextIOWrapper: 112 | """ 113 | Download a file as text from Azure Blob Storage. 114 | """ 115 | blob: BlobClient = self.__container_client.get_blob_client(path) 116 | stream_downloader = blob.download_blob() 117 | encoding = stream_downloader.properties.content_settings.content_encoding 118 | encoding = encoding if encoding else DEFAULT_ENCODING 119 | contents = stream_downloader.readall() 120 | stream = TextIOWrapper(BytesIO(contents), encoding=encoding) 121 | return stream 122 | 123 | def move_blob(self, source_path: str, target_folder: str): 124 | """ 125 | Move a blob in the container to a target folder. 126 | """ 127 | target_path = f'{target_folder}/{source_path}' 128 | target_blob: BlobClient = self.__container_client.get_blob_client(target_path) 129 | source_url = f'{self.__container_client.url}/{source_path}' 130 | target_blob.start_copy_from_url(source_url) 131 | self.__wait_for_copy(target_blob) 132 | 133 | source_blob: BlobClient = self.__container_client.get_blob_client(source_path) 134 | source_blob.delete_blob() 135 | 136 | def __wait_for_copy(self, blob: BlobClient): 137 | """ 138 | Wait for the start_copy_from_url method to be completed 139 | as per: https://github.com/Azure/azure-sdk-for-python/issues/7043 140 | """ 141 | count = 0 142 | props = blob.get_blob_properties() 143 | while props.copy.status == 'pending': 144 | count = count + 1 145 | if count > 10: 146 | raise TimeoutError('Timed out waiting for async copy to complete.') 147 | time.sleep(5) 148 | props = blob.get_blob_properties() 149 | return props 150 | 151 | def __get_blob_status(self, blob_metadata: str) -> str: 152 | return None if not blob_metadata else blob_metadata.get(STATUS) 153 | 154 | def __batch_needs_validation(self, batch: Batch) -> bool: 155 | return not batch.status or batch.status == Status.ERROR.name 156 | 157 | def __get_timestamp(self, f_date: str, f_time: str) -> datetime: 158 | """ 159 | Date looks like this: YYYYMMDD and time look like this: HHMM 160 | dt.parse requires the timestamp to be like this: "2020-03-27T08:49:30.000Z" 161 | """ 162 | timestamp = f'{f_date[:4]}-{f_date[4:6]}-{f_date[6:8]}T{f_time[:2]}:{f_time[2:4]}:00.000Z' 163 | return dt.parse(timestamp) 164 | -------------------------------------------------------------------------------- /AzureFunctions.Python/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | } 9 | } 10 | }, 11 | "extensionBundle": { 12 | "id": "Microsoft.Azure.Functions.ExtensionBundle", 13 | "version": "[1.*, 2.0.0)" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /AzureFunctions.Python/local.settings.sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "IsEncrypted": false, 3 | "Values": { 4 | "AzureWebJobsStorage": "", 5 | "DataStorage": "", 6 | "DataContainer": "data", 7 | "DataSubpath": "subpath", 8 | "FUNCTIONS_WORKER_RUNTIME": "python" 9 | } 10 | } -------------------------------------------------------------------------------- /AzureFunctions.Python/process_batches/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Time triggered Azure Function that processes batches of files from Azure Blob Storage for their 3 | validation. 4 | """ 5 | 6 | import datetime 7 | import logging 8 | import os 9 | from typing import List 10 | 11 | import azure.functions as func 12 | from azure.storage.blob import BlobServiceClient 13 | from ..common import BlobStorageClient, Batch, Status, TYPES 14 | 15 | def main(mytimer: func.TimerRequest, myqueue: func.Out[List[str]]) -> None: 16 | """ 17 | Entry point for this Azure Function. 18 | """ 19 | 20 | utc_timestamp = datetime.datetime.utcnow().replace( 21 | tzinfo=datetime.timezone.utc).isoformat() 22 | 23 | if mytimer.past_due: 24 | logging.info('The timer is past due!') 25 | 26 | logging.info('Python timer trigger function ran at %s', utc_timestamp) 27 | 28 | try: 29 | # Get all batches ready to be validated from the storage 30 | blob_service_client = BlobServiceClient.from_connection_string(os.getenv('DataStorage')) 31 | container_client = blob_service_client.get_container_client(os.getenv('DataContainer')) 32 | blob_client: BlobStorageClient = BlobStorageClient(container_client) 33 | batches = blob_client.get_batches(os.getenv('DataSubpath')) 34 | 35 | if len(batches) == 0: 36 | logging.warning('No new batches to validate') 37 | return 38 | 39 | # Send batches to the validation queue 40 | for batch in batches: 41 | batch.status = Status.RUNNING 42 | blob_client.save_batch_status(batch) 43 | logging.info('Sending batch %s > %s to the validation queue', 44 | batch.customer, batch.timestamp) 45 | 46 | myqueue.set(map(lambda b: b.serialize(), batches)) 47 | logging.info('%s new batches sent to the validation queue', len(batches)) 48 | 49 | except Exception as ex: 50 | logging.exception('EXCEPTION while getting batches', exc_info=ex) 51 | -------------------------------------------------------------------------------- /AzureFunctions.Python/process_batches/function.json: -------------------------------------------------------------------------------- 1 | { 2 | "scriptFile": "__init__.py", 3 | "bindings": [ 4 | { 5 | "name": "mytimer", 6 | "type": "timerTrigger", 7 | "direction": "in", 8 | "schedule": "0 */1 * * * *" 9 | }, 10 | { 11 | "name": "myqueue", 12 | "type": "queue", 13 | "direction": "out", 14 | "queueName": "batches" 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /AzureFunctions.Python/proxies.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json.schemastore.org/proxies", 3 | "proxies": {} 4 | } 5 | -------------------------------------------------------------------------------- /AzureFunctions.Python/requirements.txt: -------------------------------------------------------------------------------- 1 | # DO NOT include azure-functions-worker in this file 2 | # The Python Worker is managed by Azure Functions platform 3 | # Manually managing azure-functions-worker may cause unexpected issues 4 | 5 | azure-functions==1.3.0 6 | azure-storage-blob==12.4.0 7 | python-dateutil==2.8.1 8 | jsonpickle==1.4.1 9 | -------------------------------------------------------------------------------- /AzureFunctions.Python/validate_batch/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Queue triggered Azure Function that processes a single batch of files from Azure Blob Storage. 3 | """ 4 | 5 | import logging 6 | import os 7 | import json 8 | import jsonpickle 9 | 10 | import azure.functions as func 11 | from azure.storage.blob import BlobServiceClient 12 | 13 | from ..common import BlobStorageClient 14 | from .batchvalidation import BatchValidation 15 | 16 | 17 | def main(msg: func.QueueMessage) -> None: 18 | """ 19 | Entry point for this Azure Function. 20 | """ 21 | 22 | message_content: str = msg.get_body().decode('utf-8') 23 | logging.info('Python queue trigger function processed a queue item: %s', message_content) 24 | 25 | try: 26 | # Extract batch info from queue message 27 | dencoded_batch = jsonpickle.decode(message_content) 28 | json_value: dict = json.loads(dencoded_batch) 29 | 30 | # Validate batch 31 | blob_service_client = BlobServiceClient.from_connection_string(os.getenv('DataStorage')) 32 | container_client = blob_service_client.get_container_client(os.getenv('DataContainer')) 33 | blob_client: BlobStorageClient = BlobStorageClient(container_client) 34 | batch_validation: BatchValidation = BatchValidation(blob_client, json_value) 35 | batch_validation.validate() 36 | 37 | logging.info('Done validating batch') 38 | 39 | except Exception as ex: 40 | logging.exception('EXCEPTION while processing queue item: %s', message_content, exc_info=ex) 41 | -------------------------------------------------------------------------------- /AzureFunctions.Python/validate_batch/batchvalidation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Validation of a batch of files. 3 | """ 4 | 5 | from datetime import datetime, time 6 | import logging 7 | import os 8 | from dateutil import parser 9 | from dateutil.tz import tzutc 10 | 11 | from ..common import Batch, Status, BlobStorageClient 12 | 13 | INVALID_FOLDER = "invalid" 14 | VALID_FOLDER = "valid" 15 | VALID_ENCODING = "UTF-8-SIG" 16 | COLUMNS = { 17 | 'type1':4, 18 | 'type2':4, 19 | 'type3':14, 20 | 'type4':3, 21 | 'type5':2, 22 | 'type7':23, 23 | 'type8':21, 24 | 'type9':5, 25 | 'type10':3} 26 | COLUMN_SEPARATOR = ',' 27 | ENCLOSING = '"' 28 | 29 | class BatchValidation: 30 | """ 31 | Class representing a batch validation. 32 | """ 33 | 34 | def __init__(self, blob_client: BlobStorageClient, data: dict): 35 | self.__blob_client = blob_client 36 | default_date = datetime.combine(datetime.now(), time(0, tzinfo=tzutc())) 37 | customer = data['_Batch__customer'] 38 | timestamp = parser.parse(data['_Batch__timestamp'], default=default_date) 39 | self.__batch = Batch(customer, timestamp) 40 | self.__batch.status = data['_Batch__status'] 41 | self.__batch.types = data['_Batch__types'] 42 | 43 | def validate(self): 44 | """ 45 | Validates a batch of files. 46 | """ 47 | errors = [] 48 | try: 49 | customer = self.__batch.customer 50 | timestamp = self.__batch.timestamp 51 | for item in self.__batch.types.items(): 52 | stream = self.__blob_client.download_blob_content(item[1]) 53 | 54 | # Check the encoding 55 | if stream.encoding != VALID_ENCODING: 56 | errors.append(f'INVALID ENCODING in batch {customer} > {timestamp}') 57 | continue 58 | 59 | # Check the content of the csv file 60 | for line in stream: 61 | line = line.rstrip(os.linesep) 62 | 63 | # Check the number of columns 64 | columns = line.split(COLUMN_SEPARATOR) 65 | if len(columns) != COLUMNS[item[0]]: 66 | errors.append(f'INVALID COLUMNS in batch {customer} > {timestamp}') 67 | break 68 | 69 | # Check if each field is enclosed in double quotes 70 | for field in columns: 71 | if not field.startswith(ENCLOSING) or not field.endswith(ENCLOSING): 72 | errors.append(f'INVALID ENCLOSING in batch {customer} > {timestamp}') 73 | break 74 | 75 | # Set status 76 | self.__set_validation_status(Status.VALID if len(errors) == 0 else Status.INVALID) 77 | 78 | # Log errors 79 | for error in errors: 80 | logging.error(error) 81 | 82 | except Exception as ex: 83 | self.__set_validation_status(Status.ERROR) 84 | logging.exception("EXCEPTION while validating batch %s > %s", 85 | self.__batch.customer, self.__batch.timestamp, exc_info=ex) 86 | 87 | def __set_validation_status(self, status: Status): 88 | self.__batch.status = status 89 | self.__blob_client.save_batch_status(self.__batch) 90 | if status == Status.ERROR: 91 | return 92 | self.__move_batch(VALID_FOLDER if status == Status.VALID else INVALID_FOLDER) 93 | 94 | def __move_batch(self, target_folder: str): 95 | for file_type in self.__batch.types.values(): 96 | self.__blob_client.move_blob(file_type, target_folder) 97 | -------------------------------------------------------------------------------- /AzureFunctions.Python/validate_batch/function.json: -------------------------------------------------------------------------------- 1 | { 2 | "scriptFile": "__init__.py", 3 | "bindings": [ 4 | { 5 | "name": "msg", 6 | "type": "queueTrigger", 7 | "direction": "in", 8 | "queueName": "batches" 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /AzureFunctions.v1.Durable/AzureFunctions.v1.Durable.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net471 4 | 5 | 6 | TRACE;FUNCTIONS_V1 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | PreserveNewest 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | PreserveNewest 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /AzureFunctions.v1.Durable/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "AzureFunctions.Durable.v1": { 4 | "commandName": "Project" 5 | } 6 | } 7 | } -------------------------------------------------------------------------------- /AzureFunctions.v1.Durable/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "logLevel": { 5 | "default": "Trace" 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /AzureFunctions.v1/AzureFunctions.v1.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net471 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Never 13 | 14 | 15 | 16 | 17 | PreserveNewest 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | PreserveNewest 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /AzureFunctions.v1/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "logLevel": { 5 | "default": "Trace" 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /AzureFunctions.v3.Durable/AzureFunctions.v3.Durable.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | netcoreapp3.1 4 | v3 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | PreserveNewest 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | PreserveNewest 26 | 27 | 28 | -------------------------------------------------------------------------------- /AzureFunctions.v3.Durable/DurableContextExtensions.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Azure.WebJobs; 2 | using Microsoft.Azure.WebJobs.Extensions.DurableTask; 3 | using Microsoft.Extensions.Logging; 4 | using Newtonsoft.Json.Linq; 5 | 6 | namespace FileValidation 7 | { 8 | static class DurableContextExtensions 9 | { 10 | #if FUNCTIONS_V1 11 | public static void Log(this DurableOrchestrationContext context, ILogger log, string messsage, bool onlyIfNotReplaying = true) 12 | { 13 | if (!onlyIfNotReplaying || !context.IsReplaying) 14 | { 15 | log.LogWarning(messsage); 16 | } 17 | } 18 | 19 | public static void Log(this DurableOrchestrationClient _, ILogger log, string messsage) => log.LogWarning(messsage); 20 | 21 | public static JToken GetInputAsJson(this DurableActivityContextBase ctx) => ctx.GetInput(); 22 | 23 | public static JToken GetInputAsJson(this DurableOrchestrationContextBase ctx) => ctx.GetInput(); 24 | #else 25 | public static void Log(this IDurableOrchestrationContext context, ILogger log, string messsage, bool onlyIfNotReplaying = true) 26 | { 27 | if (!onlyIfNotReplaying || !context.IsReplaying) 28 | { 29 | log.LogWarning(messsage); 30 | } 31 | } 32 | 33 | public static void Log(this IDurableClient _, ILogger log, string messsage) => log.LogWarning(messsage); 34 | 35 | public static JToken GetInputAsJson(this IDurableActivityContext ctx) => ctx.GetInput(); 36 | 37 | public static JToken GetInputAsJson(this IDurableOrchestrationContext ctx) => ctx.GetInput(); 38 | #endif 39 | 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /AzureFunctions.v3.Durable/FunctionEnsureAllFiles.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | using System.Linq; 3 | using System.Threading.Tasks; 4 | using System.Collections.Generic; 5 | using Azure.Messaging.EventGrid; 6 | using Microsoft.Azure.WebJobs; 7 | using Microsoft.Azure.WebJobs.Extensions.DurableTask; 8 | using Microsoft.Extensions.Logging; 9 | 10 | namespace FileValidation 11 | { 12 | public static class FunctionEnsureAllFiles 13 | { 14 | [FunctionName("EnsureAllFiles")] 15 | public static async Task Run([OrchestrationTrigger] IDurableOrchestrationContext context, ILogger log) 16 | { 17 | if (!context.IsReplaying) 18 | { 19 | log.LogTrace($@"EnsureAllFiles STARTING - InstanceId: {context.InstanceId}"); 20 | } 21 | else 22 | { 23 | log.LogTrace($@"EnsureAllFiles REPLAYING"); 24 | } 25 | 26 | var eventGridSoleItem = context.GetInput(); 27 | 28 | CustomerBlobAttributes newCustomerFile = Helpers.ParseEventGridPayload(eventGridSoleItem, log); 29 | if (newCustomerFile == null) 30 | { // The request either wasn't valid (filename couldn't be parsed) or not applicable (put in to a folder other than /inbound) 31 | return; 32 | } 33 | 34 | var expectedFiles = Helpers.GetExpectedFilesForCustomer(); 35 | var filesStillWaitingFor = new HashSet(expectedFiles); 36 | var filename = newCustomerFile.Filename; 37 | 38 | while (filesStillWaitingFor.Any()) 39 | { 40 | filesStillWaitingFor.Remove(Path.GetFileNameWithoutExtension(filename).Split('_').Last()); 41 | if (filesStillWaitingFor.Count == 0) 42 | { 43 | break; 44 | } 45 | 46 | log.LogTrace($@"Still waiting for more files... Still need {string.Join(", ", filesStillWaitingFor)} for customer {newCustomerFile.CustomerName}, batch {newCustomerFile.BatchPrefix}"); 47 | 48 | filename = await context.WaitForExternalEvent(@"newfile"); 49 | log.LogTrace($@"Got new file via event: {filename}"); 50 | } 51 | 52 | // Verify that this prefix isn't already in the lock table for processings 53 | log.LogInformation(@"Got all the files! Moving on..."); 54 | 55 | // call next step in functions with the prefix so it knows what to go grab 56 | await context.CallActivityAsync(@"ValidateFileSet", new FilesetValidationRequest 57 | { 58 | Prefix = $@"{newCustomerFile.ContainerName}/inbound/{newCustomerFile.BatchPrefix}", 59 | ExpectedFiles = expectedFiles 60 | }); 61 | } 62 | 63 | class BlobFilenameVsDatabaseFileMaskComparer : IEqualityComparer 64 | { 65 | public bool Equals(string x, string y) => y.Contains(x); 66 | 67 | public int GetHashCode(string obj) => obj.GetHashCode(); 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /AzureFunctions.v3.Durable/FunctionValidateFileSet.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Threading.Tasks; 4 | using Microsoft.Azure.WebJobs; 5 | using Microsoft.Azure.WebJobs.Extensions.DurableTask; 6 | using Microsoft.Extensions.Logging; 7 | using Microsoft.WindowsAzure.Storage; 8 | 9 | namespace FileValidation 10 | { 11 | public static class FunctionValidateFileSet 12 | { 13 | [FunctionName(@"ValidateFileSet")] 14 | public static async Task Run([ActivityTrigger] FilesetValidationRequest payload, ILogger log) 15 | { 16 | log.LogTrace(@"ValidateFileSet run."); 17 | if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"CustomerBlobStorage"), out _)) 18 | { 19 | throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); 20 | } 21 | 22 | var prefix = payload.Prefix; // This is the entire path w/ prefix for the file set 23 | 24 | return await Helpers.DoValidationAsync(prefix, log); 25 | } 26 | 27 | } 28 | 29 | public class FilesetValidationRequest 30 | { 31 | public string Prefix { get; set; } 32 | 33 | public IEnumerable ExpectedFiles { get; set; } 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /AzureFunctions.v3.Durable/Orchestrator.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Net; 3 | using System.Net.Http; 4 | using Azure.Messaging.EventGrid; 5 | using Azure.Messaging.EventGrid.SystemEvents; 6 | using Microsoft.Azure.WebJobs; 7 | using Microsoft.Azure.WebJobs.Extensions.DurableTask; 8 | using Microsoft.Azure.WebJobs.Extensions.Http; 9 | using Microsoft.Extensions.Logging; 10 | 11 | namespace FileValidation 12 | { 13 | public static class Orchestrator 14 | { 15 | [FunctionName("Orchestrator")] 16 | public static async System.Threading.Tasks.Task RunAsync([HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequestMessage req, [DurableClient] IDurableClient starter, ILogger log) 17 | { 18 | var reader = await req.Content.ReadAsStringAsync(); 19 | var evt = EventGridEvent.Parse(BinaryData.FromString(reader)); 20 | if (evt == null) 21 | { 22 | return req.CreateCompatibleResponse(HttpStatusCode.BadRequest, @"Expecting only one item in the Event Grid message"); 23 | } 24 | 25 | if (evt.TryGetSystemEventData(out object eventData)) 26 | { 27 | if (eventData is SubscriptionValidationEventData subscriptionValidationEventData) 28 | { 29 | log.LogTrace(@"Event Grid Validation event received."); 30 | return req.CreateCompatibleResponse(HttpStatusCode.OK, $"{{ \"validationResponse\" : \"{subscriptionValidationEventData.ValidationCode}\" }}"); 31 | } 32 | } 33 | 34 | CustomerBlobAttributes newCustomerFile = Helpers.ParseEventGridPayload(evt, log); 35 | if (newCustomerFile == null) 36 | { // The request either wasn't valid (filename couldn't be parsed) or not applicable (put in to a folder other than /inbound) 37 | return req.CreateCompatibleResponse(HttpStatusCode.NoContent); 38 | } 39 | 40 | string customerName = newCustomerFile.CustomerName, name = newCustomerFile.Filename; 41 | log.LogInformation($@"Processing new file. customer: {customerName}, filename: {name}"); 42 | 43 | // get the prefix for the name so we can check for others in the same container with in the customer blob storage account 44 | var prefix = newCustomerFile.BatchPrefix; 45 | 46 | var instanceForPrefix = await starter.GetStatusAsync(prefix); 47 | if (instanceForPrefix == null) 48 | { 49 | log.LogTrace($@"New instance needed for prefix '{prefix}'. Starting..."); 50 | var retval = await starter.StartNewAsync(@"EnsureAllFiles", prefix, evt); 51 | log.LogTrace($@"Started. {retval}"); 52 | } 53 | else 54 | { 55 | log.LogTrace($@"Instance already waiting. Current status: {instanceForPrefix.RuntimeStatus}. Firing 'newfile' event..."); 56 | 57 | if (instanceForPrefix.RuntimeStatus != OrchestrationRuntimeStatus.Running 58 | && instanceForPrefix.RuntimeStatus != OrchestrationRuntimeStatus.Pending) 59 | { 60 | if (instanceForPrefix.RuntimeStatus != OrchestrationRuntimeStatus.Terminated) 61 | { 62 | await starter.TerminateAsync(prefix, @"bounce"); 63 | } 64 | 65 | var retval = await starter.StartNewAsync(@"EnsureAllFiles", prefix, evt); 66 | log.LogTrace($@"Restarted listener for {prefix}. {retval}"); 67 | } 68 | else 69 | { 70 | await starter.RaiseEventAsync(prefix, @"newfile", newCustomerFile.Filename); 71 | } 72 | } 73 | 74 | return starter.CreateCheckStatusResponse(req, prefix); 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /AzureFunctions.v3.Durable/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "logLevel": { 5 | "default": "Trace" 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /AzureFunctions.v3.DurableEntities/AzureFunctions.v3.DurableEntities.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | netcoreapp3.1 4 | v3 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | PreserveNewest 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | PreserveNewest 24 | 25 | 26 | PreserveNewest 27 | Never 28 | 29 | 30 | -------------------------------------------------------------------------------- /AzureFunctions.v3.DurableEntities/BatchEntity.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Linq; 3 | using System.Threading.Tasks; 4 | using Microsoft.Azure.WebJobs; 5 | using Microsoft.Azure.WebJobs.Extensions.DurableTask; 6 | using Microsoft.Extensions.Logging; 7 | using Newtonsoft.Json; 8 | 9 | namespace FileValidation 10 | { 11 | [JsonObject(MemberSerialization.OptIn)] 12 | public class BatchEntity : IBatchEntity 13 | { 14 | private readonly string _id; 15 | private readonly ILogger _logger; 16 | 17 | public BatchEntity(string id, ILogger logger) 18 | { 19 | _id = id; 20 | _logger = logger; 21 | } 22 | 23 | [JsonProperty] 24 | public List ReceivedFileTypes { get; set; } = new List(); 25 | 26 | [FunctionName(nameof(BatchEntity))] 27 | public static Task Run([EntityTrigger]IDurableEntityContext ctx, ILogger logger) => ctx.DispatchAsync(ctx.EntityKey, logger); 28 | 29 | public async Task NewFile(string fileUri) 30 | { 31 | var newCustomerFile = CustomerBlobAttributes.Parse(fileUri); 32 | _logger.LogInformation($@"Got new file via event: {newCustomerFile.Filename}"); 33 | this.ReceivedFileTypes.Add(newCustomerFile.Filetype); 34 | 35 | _logger.LogTrace($@"Actor '{_id}' got file '{newCustomerFile.Filetype}'"); 36 | 37 | var filesStillWaitingFor = Helpers.GetExpectedFilesForCustomer().Except(this.ReceivedFileTypes); 38 | if (filesStillWaitingFor.Any()) 39 | { 40 | _logger.LogInformation($@"Still waiting for more files... Still need {string.Join(", ", filesStillWaitingFor)} for customer {newCustomerFile.CustomerName}, batch {newCustomerFile.BatchPrefix}"); 41 | } 42 | else 43 | { 44 | _logger.LogInformation(@"Got all the files! Moving on..."); 45 | 46 | // call next step in functions with the prefix so it knows what to go grab 47 | await Helpers.DoValidationAsync($@"{newCustomerFile.ContainerName}/inbound/{newCustomerFile.BatchPrefix}", _logger); 48 | } 49 | } 50 | } 51 | 52 | public interface IBatchEntity 53 | { 54 | Task NewFile(string fileUri); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /AzureFunctions.v3.DurableEntities/Orchestrator.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Net; 3 | using System.Net.Http; 4 | using Azure.Messaging.EventGrid; 5 | using Azure.Messaging.EventGrid.SystemEvents; 6 | using Microsoft.Azure.WebJobs; 7 | using Microsoft.Azure.WebJobs.Extensions.DurableTask; 8 | using Microsoft.Azure.WebJobs.Extensions.Http; 9 | using Microsoft.Extensions.Logging; 10 | 11 | namespace FileValidation 12 | { 13 | public static class Orchestrator 14 | { 15 | [FunctionName("Orchestrator")] 16 | public static async System.Threading.Tasks.Task RunAsync([HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequestMessage req, [DurableClient] IDurableClient starter, ILogger log) 17 | { 18 | var reader = await req.Content.ReadAsStringAsync(); 19 | var evt = EventGridEvent.Parse(BinaryData.FromString(reader)); 20 | if (evt == null) 21 | { 22 | return req.CreateResponse(HttpStatusCode.BadRequest, @"Expecting only one item in the Event Grid message"); 23 | } 24 | 25 | if (evt.TryGetSystemEventData(out object eventData)) 26 | { 27 | if (eventData is SubscriptionValidationEventData subscriptionValidationEventData) 28 | { 29 | log.LogTrace(@"Event Grid Validation event received."); 30 | return req.CreateCompatibleResponse(HttpStatusCode.OK, $"{{ \"validationResponse\" : \"{subscriptionValidationEventData.ValidationCode}\" }}"); 31 | } 32 | } 33 | 34 | CustomerBlobAttributes newCustomerFile = Helpers.ParseEventGridPayload(evt, log); 35 | if (newCustomerFile == null) 36 | { // The request either wasn't valid (filename couldn't be parsed) or not applicable (put in to a folder other than /inbound) 37 | return req.CreateResponse(HttpStatusCode.NoContent); 38 | } 39 | 40 | string customerName = newCustomerFile.CustomerName, name = newCustomerFile.Filename, containerName = newCustomerFile.ContainerName; 41 | log.LogInformation($@"Processing new file. customer: {customerName}, filename: {name}"); 42 | 43 | // get the prefix for the name so we can check for others in the same container with in the customer blob storage account 44 | var prefix = newCustomerFile.BatchPrefix; 45 | await starter.SignalEntityAsync(prefix, b => b.NewFile(newCustomerFile.FullUrl)); 46 | 47 | return req.CreateResponse(HttpStatusCode.Accepted); 48 | 49 | } 50 | } 51 | } -------------------------------------------------------------------------------- /AzureFunctions.v3.DurableEntities/README.md: -------------------------------------------------------------------------------- 1 | # File validation using Durable Entities 2 | To learn more about Durable Entities, check out the documentation [here](https://docs.microsoft.com/en-us/azure/azure-functions/durable/durable-functions-entities). 3 | 4 | In this sample, you'll see how we can treat the "batch" which is being validated as a virtual actor using Durable Entities. It's then up to the entity itself to determine when all files are present, tracking state along the way. 5 | 6 | As you'll see, it greatly simplifies the amount of Orchestration code we had to write in the other examples. -------------------------------------------------------------------------------- /AzureFunctions.v3.DurableEntities/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "logLevel": { 5 | "default": "Trace" 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /AzureFunctions.v3/AzureFunctions.v3.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | netcoreapp3.1 4 | v3 5 | Library 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | PreserveNewest 15 | 16 | 17 | PreserveNewest 18 | Never 19 | 20 | 21 | -------------------------------------------------------------------------------- /AzureFunctions.v3/CustomerBlobAttributes.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Globalization; 3 | using System.Text.RegularExpressions; 4 | 5 | namespace FileValidation 6 | { 7 | public class CustomerBlobAttributes 8 | { 9 | static readonly Regex blobUrlRegexExtract = new Regex(@"^\S*/([^/]+)/inbound/((([^_]+)_([\d]+_[\d]+))_([\w]+))\.csv$", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline); 10 | 11 | static readonly Regex blobUrlRegexExtractWithSubfolder = new Regex(@"^\S*/([^/]+)/([^/]+)/((([^_]+)_([\d]+_[\d]+))_([\w]+))\.csv$", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline); 12 | 13 | public string FullUrl { get; private set; } 14 | public string Filename { get; private set; } 15 | public string BatchPrefix { get; private set; } 16 | public DateTime BatchDateTime { get; private set; } 17 | public string Filetype { get; private set; } 18 | public string CustomerName { get; private set; } 19 | public string ContainerName { get; private set; } 20 | public string Subfolder { get; private set; } 21 | 22 | public static CustomerBlobAttributes Parse(string fullUri, bool detectSubfolder = false) 23 | { 24 | if (detectSubfolder) 25 | { 26 | var regexMatch = blobUrlRegexExtractWithSubfolder.Match(fullUri); 27 | if (regexMatch.Success) 28 | { 29 | return new CustomerBlobAttributes 30 | { 31 | FullUrl = regexMatch.Groups[0].Value, 32 | ContainerName = regexMatch.Groups[1].Value, 33 | Subfolder = regexMatch.Groups[2].Value, 34 | Filename = regexMatch.Groups[3].Value, 35 | BatchPrefix = regexMatch.Groups[4].Value, 36 | CustomerName = regexMatch.Groups[5].Value, 37 | BatchDateTime = DateTime.ParseExact(regexMatch.Groups[6].Value, @"yyyyMMdd_HHmm", CultureInfo.InvariantCulture), 38 | Filetype = regexMatch.Groups[7].Value 39 | }; 40 | } 41 | } 42 | else 43 | { 44 | var regexMatch = blobUrlRegexExtract.Match(fullUri); 45 | if (regexMatch.Success) 46 | { 47 | return new CustomerBlobAttributes 48 | { 49 | FullUrl = regexMatch.Groups[0].Value, 50 | ContainerName = regexMatch.Groups[1].Value, 51 | Filename = regexMatch.Groups[2].Value, 52 | BatchPrefix = regexMatch.Groups[3].Value, 53 | CustomerName = regexMatch.Groups[4].Value, 54 | BatchDateTime = DateTime.ParseExact(regexMatch.Groups[5].Value, @"yyyyMMdd_HHmm", CultureInfo.InvariantCulture), 55 | Filetype = regexMatch.Groups[6].Value 56 | }; 57 | } 58 | } 59 | 60 | return null; 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /AzureFunctions.v3/Extensions.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Net; 3 | using System.Net.Http; 4 | using Microsoft.WindowsAzure.Storage.Blob; 5 | using Microsoft.WindowsAzure.Storage.Table; 6 | 7 | namespace FileValidation 8 | { 9 | static class StorageExtensions 10 | { 11 | public static async System.Threading.Tasks.Task> ExecuteQueryAsync(this CloudTable table, TableQuery query) where T : ITableEntity, new() 12 | { 13 | TableContinuationToken token = null; 14 | var retVal = new List(); 15 | do 16 | { 17 | var results = await table.ExecuteQuerySegmentedAsync(query, token); 18 | retVal.AddRange(results.Results); 19 | token = results.ContinuationToken; 20 | } while (token != null); 21 | 22 | return retVal; 23 | } 24 | 25 | 26 | public static async System.Threading.Tasks.Task> ListBlobsAsync(this CloudBlobClient blobClient, string prefix) 27 | { 28 | BlobContinuationToken token = null; 29 | var retVal = new List(); 30 | do 31 | { 32 | var results = await blobClient.ListBlobsSegmentedAsync(prefix, token); 33 | retVal.AddRange(results.Results); 34 | token = results.ContinuationToken; 35 | } while (token != null); 36 | 37 | return retVal; 38 | } 39 | 40 | } 41 | 42 | static class HttpExtensions 43 | { 44 | public static HttpResponseMessage CreateCompatibleResponse(this HttpRequestMessage _, HttpStatusCode code) => new HttpResponseMessage(code); 45 | 46 | public static HttpResponseMessage CreateCompatibleResponse(this HttpRequestMessage _, HttpStatusCode code, string stringContent) => new HttpResponseMessage(code) { Content = new StringContent(stringContent) }; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /AzureFunctions.v3/FunctionEnsureAllFiles.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Net; 6 | using System.Net.Http; 7 | using System.Threading.Tasks; 8 | using Azure.Messaging.EventGrid; 9 | using Azure.Messaging.EventGrid.SystemEvents; 10 | using Microsoft.Azure.WebJobs; 11 | using Microsoft.Azure.WebJobs.Extensions.Http; 12 | using Microsoft.Extensions.Logging; 13 | using Microsoft.WindowsAzure.Storage; 14 | using Microsoft.WindowsAzure.Storage.Table; 15 | 16 | namespace FileValidation 17 | { 18 | public static class FunctionEnsureAllFiles 19 | { 20 | [FunctionName("EnsureAllFiles")] 21 | public static async Task Run([HttpTrigger(AuthorizationLevel.Function, @"post")] HttpRequestMessage req, ILogger log) 22 | { 23 | var reader = await req.Content.ReadAsStringAsync(); 24 | var evt = EventGridEvent.Parse(BinaryData.FromString(reader)); 25 | if (evt == null) 26 | { 27 | return req.CreateErrorResponse(HttpStatusCode.BadRequest, $@"Expecting only one item in the Event Grid message"); 28 | } 29 | 30 | if (evt.TryGetSystemEventData(out object eventData)) 31 | { 32 | if (eventData is SubscriptionValidationEventData subscriptionValidationEventData) 33 | { 34 | log.LogTrace(@"Event Grid Validation event received."); 35 | return req.CreateCompatibleResponse(HttpStatusCode.OK, $"{{ \"validationResponse\" : \"{subscriptionValidationEventData.ValidationCode}\" }}"); 36 | } 37 | } 38 | 39 | var newCustomerFile = Helpers.ParseEventGridPayload(evt, log); 40 | if (newCustomerFile == null) 41 | { // The request either wasn't valid (filename couldn't be parsed) or not applicable (put in to a folder other than /inbound) 42 | return req.CreateCompatibleResponse(HttpStatusCode.NoContent); 43 | } 44 | 45 | // get the prefix for the name so we can check for others in the same container with in the customer blob storage account 46 | var prefix = newCustomerFile.BatchPrefix; 47 | 48 | if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"CustomerBlobStorage"), out var blobStorage)) 49 | { 50 | throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); 51 | } 52 | 53 | var blobClient = blobStorage.CreateCloudBlobClient(); 54 | var matches = await blobClient.ListBlobsAsync(prefix: $@"{newCustomerFile.ContainerName}/inbound/{prefix}"); 55 | var matchNames = matches.Select(m => Path.GetFileNameWithoutExtension(blobClient.GetBlobReferenceFromServerAsync(m.StorageUri.PrimaryUri).GetAwaiter().GetResult().Name).Split('_').Last()).ToList(); 56 | 57 | var expectedFiles = Helpers.GetExpectedFilesForCustomer(); 58 | var filesStillWaitingFor = expectedFiles.Except(matchNames, new BlobFilenameVsDatabaseFileMaskComparer()); 59 | 60 | if (!filesStillWaitingFor.Any()) 61 | { 62 | // Verify that this prefix isn't already in the lock table for processings 63 | var lockTable = await Helpers.GetLockTableAsync(); 64 | var entriesMatchingPrefix = await LockTableEntity.GetLockRecordAsync(prefix, lockTable); 65 | if (entriesMatchingPrefix != null) 66 | { 67 | log.LogInformation($@"Skipping. We've already queued the batch with prefix '{prefix}' for processing"); 68 | return req.CreateCompatibleResponse(HttpStatusCode.NoContent); 69 | } 70 | 71 | log.LogInformation(@"Got all the files! Moving on..."); 72 | try 73 | { 74 | await lockTable.ExecuteAsync(TableOperation.Insert(new LockTableEntity(prefix))); 75 | } 76 | catch (StorageException) 77 | { 78 | log.LogInformation($@"Skipping. We've already queued the batch with prefix '{prefix}' for processing"); 79 | return req.CreateCompatibleResponse(HttpStatusCode.NoContent); 80 | } 81 | 82 | using (var c = new HttpClient()) 83 | { 84 | var jsonObjectForValidator = 85 | $@"{{ 86 | ""prefix"" : ""{newCustomerFile.ContainerName}/inbound/{prefix}"", 87 | ""fileTypes"" : [ 88 | {string.Join(", ", expectedFiles.Select(e => $@"""{e}"""))} 89 | ] 90 | }}"; 91 | // call next step in functions with the prefix so it knows what to go grab 92 | await c.PostAsync($@"{Environment.GetEnvironmentVariable(@"ValidateFunctionUrl")}", new StringContent(jsonObjectForValidator)); 93 | 94 | return req.CreateCompatibleResponse(HttpStatusCode.OK); 95 | } 96 | } 97 | else 98 | { 99 | log.LogInformation($@"Still waiting for more files... Have {matches.Count()} file(s) from this customer ({newCustomerFile.CustomerName}) for batch {newCustomerFile.BatchPrefix}. Still need {string.Join(", ", filesStillWaitingFor)}"); 100 | 101 | return req.CreateCompatibleResponse(HttpStatusCode.Accepted); 102 | } 103 | } 104 | 105 | class BlobFilenameVsDatabaseFileMaskComparer : IEqualityComparer 106 | { 107 | public bool Equals(string x, string y) => y.Contains(x); 108 | 109 | public int GetHashCode(string obj) => obj.GetHashCode(); 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /AzureFunctions.v3/FunctionValidateFileSet.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Net; 6 | using System.Net.Http; 7 | using System.Threading.Tasks; 8 | using Microsoft.Azure.WebJobs; 9 | using Microsoft.Azure.WebJobs.Extensions.Http; 10 | using Microsoft.Extensions.Logging; 11 | using Microsoft.WindowsAzure.Storage; 12 | using Microsoft.WindowsAzure.Storage.Blob; 13 | using Microsoft.WindowsAzure.Storage.Table; 14 | using Newtonsoft.Json.Linq; 15 | 16 | namespace FileValidation 17 | { 18 | public static class FunctionValidateFileSet 19 | { 20 | [FunctionName(@"ValidateFileSet")] 21 | public static async Task Run([HttpTrigger(AuthorizationLevel.Function, @"post", Route = @"Validate")] HttpRequestMessage req, ILogger log) 22 | { 23 | log.LogTrace(@"ValidateFileSet run."); 24 | if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"CustomerBlobStorage"), out var storageAccount)) 25 | { 26 | throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); 27 | } 28 | 29 | var payload = JObject.Parse(await req.Content.ReadAsStringAsync()); 30 | 31 | var prefix = payload["prefix"].ToString(); // This is the entire path w/ prefix for the file set 32 | log.LogTrace($@"prefix: {prefix}"); 33 | 34 | var filePrefix = prefix.Substring(prefix.LastIndexOf('/') + 1); 35 | log.LogTrace($@"filePrefix: {filePrefix}"); 36 | 37 | var lockTable = await Helpers.GetLockTableAsync(); 38 | if (!await ShouldProceedAsync(lockTable, prefix, filePrefix, log)) 39 | { 40 | return req.CreateResponse(HttpStatusCode.OK); 41 | } 42 | 43 | var blobClient = storageAccount.CreateCloudBlobClient(); 44 | var targetBlobs = await blobClient.ListBlobsAsync(WebUtility.UrlDecode(prefix)); 45 | 46 | var customerName = filePrefix.Split('_').First().Split('-').Last(); 47 | 48 | var errors = new List(); 49 | var filesToProcess = payload["fileTypes"].Values(); 50 | 51 | foreach (var blobDetails in targetBlobs) 52 | { 53 | var blob = await blobClient.GetBlobReferenceFromServerAsync(blobDetails.StorageUri.PrimaryUri); 54 | 55 | var fileParts = CustomerBlobAttributes.Parse(blob.Uri.AbsolutePath); 56 | if (!filesToProcess.Contains(fileParts.Filetype, StringComparer.OrdinalIgnoreCase)) 57 | { 58 | log.LogTrace($@"{blob.Name} skipped. Isn't in the list of file types to process ({string.Join(", ", filesToProcess)}) for bottler '{customerName}'"); 59 | continue; 60 | } 61 | 62 | var lowerFileType = fileParts.Filetype.ToLowerInvariant(); 63 | log.LogInformation($@"Validating {lowerFileType}..."); 64 | 65 | uint numColumns = 0; 66 | switch (lowerFileType) 67 | { 68 | case @"type5": // salestype 69 | numColumns = 2; 70 | break; 71 | case @"type10": // mixedpack 72 | case @"type4": // shipfrom 73 | numColumns = 3; 74 | break; 75 | case @"type1": // channel 76 | case @"type2": // customer 77 | numColumns = 4; 78 | break; 79 | case @"type9": // itemdetail 80 | case @"type3": // shipto 81 | numColumns = 14; 82 | break; 83 | case @"type6": // salesdetail 84 | numColumns = 15; 85 | break; 86 | case @"type8": // product 87 | numColumns = 21; 88 | break; 89 | case @"type7": // sales 90 | numColumns = 23; 91 | break; 92 | default: 93 | throw new ArgumentOutOfRangeException(nameof(prefix), $@"Unhandled file type: {fileParts.Filetype}"); 94 | } 95 | 96 | errors.AddRange(await ValidateCsvStructureAsync(blob, numColumns, lowerFileType)); 97 | } 98 | try 99 | { 100 | await LockTableEntity.UpdateAsync(filePrefix, LockTableEntity.BatchState.Done, lockTable); 101 | } 102 | catch (StorageException) 103 | { 104 | log.LogWarning($@"That's weird. The lock for prefix {prefix} wasn't there. Shouldn't happen!"); 105 | return req.CreateResponse(HttpStatusCode.OK); 106 | } 107 | 108 | if (errors.Any()) 109 | { 110 | log.LogError($@"Errors found in batch {filePrefix}: {string.Join(@", ", errors)}"); 111 | 112 | // move files to 'invalid-set' folder 113 | await MoveBlobsAsync(log, blobClient, targetBlobs, @"invalid-set"); 114 | 115 | return req.CreateErrorResponse(HttpStatusCode.BadRequest, string.Join(@", ", errors)); 116 | } 117 | else 118 | { 119 | // move these files to 'valid-set' folder 120 | await MoveBlobsAsync(log, blobClient, targetBlobs, @"valid-set"); 121 | 122 | log.LogInformation($@"Set {filePrefix} successfully validated and queued for further processing."); 123 | 124 | return req.CreateResponse(HttpStatusCode.OK); 125 | } 126 | } 127 | 128 | private static async Task ShouldProceedAsync(CloudTable bottlerFilesTable, string prefix, string filePrefix, ILogger log) 129 | { 130 | try 131 | { 132 | var lockRecord = await LockTableEntity.GetLockRecordAsync(filePrefix, bottlerFilesTable); 133 | if (lockRecord?.State == LockTableEntity.BatchState.Waiting) 134 | { 135 | // Update the lock record to mark it as in progress 136 | lockRecord.State = LockTableEntity.BatchState.InProgress; 137 | await bottlerFilesTable.ExecuteAsync(TableOperation.Replace(lockRecord)); 138 | return true; 139 | } 140 | else 141 | { 142 | log.LogInformation($@"Validate for {prefix} skipped. State was {lockRecord?.State.ToString() ?? @"[null]"}."); 143 | } 144 | } 145 | catch (StorageException) 146 | { 147 | log.LogInformation($@"Validate for {prefix} skipped (StorageException. Somebody else picked it up already."); 148 | } 149 | 150 | return false; 151 | } 152 | 153 | private static async Task MoveBlobsAsync(ILogger log, CloudBlobClient blobClient, IEnumerable targetBlobs, string folderName) 154 | { 155 | foreach (var b in targetBlobs) 156 | { 157 | var blobRef = await blobClient.GetBlobReferenceFromServerAsync(b.StorageUri.PrimaryUri); 158 | var sourceBlob = b.Container.GetBlockBlobReference(blobRef.Name); 159 | 160 | var targetBlob = blobRef.Container 161 | .GetDirectoryReference($@"{folderName}") 162 | .GetBlockBlobReference(Path.GetFileName(blobRef.Name)); 163 | 164 | await targetBlob.StartCopyAsync(sourceBlob); 165 | 166 | while (targetBlob.CopyState.Status == CopyStatus.Pending) 167 | { 168 | ; // spinlock until the copy completes 169 | } 170 | 171 | var copySucceeded = targetBlob.CopyState.Status == CopyStatus.Success; 172 | if (!copySucceeded) 173 | { 174 | log.LogError($@"Error copying {sourceBlob.Name} to {folderName} folder. Retrying once..."); 175 | 176 | await targetBlob.StartCopyAsync(sourceBlob); 177 | 178 | while (targetBlob.CopyState.Status == CopyStatus.Pending) 179 | { 180 | ; // spinlock until the copy completes 181 | } 182 | 183 | copySucceeded = targetBlob.CopyState.Status == CopyStatus.Success; 184 | if (!copySucceeded) 185 | { 186 | log.LogError($@"Error retrying copy of {sourceBlob.Name} to {folderName} folder. File not moved."); 187 | } 188 | } 189 | 190 | if (copySucceeded) 191 | { 192 | #if DEBUG 193 | try 194 | { 195 | #endif 196 | await sourceBlob.DeleteAsync(); 197 | #if DEBUG 198 | } 199 | catch (StorageException ex) 200 | { 201 | log.LogError($@"Error deleting blob {sourceBlob.Name}", ex); 202 | } 203 | #endif 204 | } 205 | } 206 | } 207 | 208 | private static async Task> ValidateCsvStructureAsync(ICloudBlob blob, uint requiredNumberOfColumnsPerLine, string filetypeDescription) 209 | { 210 | var errs = new List(); 211 | try 212 | { 213 | using (var blobReader = new StreamReader(await blob.OpenReadAsync(new AccessCondition(), new BlobRequestOptions(), new OperationContext()))) 214 | { 215 | var fileAttributes = CustomerBlobAttributes.Parse(blob.Uri.AbsolutePath); 216 | 217 | for (var lineNumber = 0; !blobReader.EndOfStream; lineNumber++) 218 | { 219 | var errorPrefix = $@"{filetypeDescription} file '{fileAttributes.Filename}' Record {lineNumber}"; 220 | var line = blobReader.ReadLine(); 221 | var fields = line.Split(','); 222 | if (fields.Length != requiredNumberOfColumnsPerLine) 223 | { 224 | errs.Add($@"{errorPrefix} is malformed. Should have {requiredNumberOfColumnsPerLine} values; has {fields.Length}"); 225 | continue; 226 | } 227 | 228 | for (var i = 0; i < fields.Length; i++) 229 | { 230 | errorPrefix = $@"{errorPrefix} Field {i}"; 231 | var field = fields[i]; 232 | // each field must be enclosed in double quotes 233 | if (field[0] != '"' || field.Last() != '"') 234 | { 235 | errs.Add($@"{errorPrefix}: value ({field}) is not enclosed in double quotes ("")"); 236 | continue; 237 | } 238 | } 239 | } 240 | 241 | // Validate file is UTF-8 encoded 242 | if (!blobReader.CurrentEncoding.BodyName.Equals("utf-8", StringComparison.OrdinalIgnoreCase)) 243 | { 244 | errs.Add($@"{blob.Name} is not UTF-8 encoded"); 245 | } 246 | } 247 | } 248 | catch (StorageException storEx) 249 | { 250 | SwallowStorage404(storEx); 251 | } 252 | return errs; 253 | } 254 | 255 | private static void SwallowStorage404(StorageException storEx) 256 | { 257 | var webEx = storEx.InnerException as WebException; 258 | if ((webEx.Response as HttpWebResponse)?.StatusCode == HttpStatusCode.NotFound) 259 | { 260 | // Ignore 261 | } 262 | else 263 | { 264 | throw storEx; 265 | } 266 | } 267 | } 268 | } 269 | -------------------------------------------------------------------------------- /AzureFunctions.v3/Helpers.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Net; 6 | using System.Threading.Tasks; 7 | using Azure.Messaging.EventGrid; 8 | using Microsoft.Extensions.Logging; 9 | using Microsoft.WindowsAzure.Storage; 10 | using Microsoft.WindowsAzure.Storage.Blob; 11 | using Microsoft.WindowsAzure.Storage.Table; 12 | 13 | namespace FileValidation 14 | { 15 | static class Helpers 16 | { 17 | public static async System.Threading.Tasks.Task GetLockTableAsync(CloudStorageAccount storageAccount = null) 18 | { 19 | CloudTable customerFilesTable; 20 | if (storageAccount == null) 21 | { 22 | if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"AzureWebJobsStorage"), out var sa)) 23 | { 24 | throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); 25 | } 26 | else 27 | { 28 | storageAccount = sa; 29 | } 30 | } 31 | 32 | try 33 | { 34 | customerFilesTable = storageAccount.CreateCloudTableClient().GetTableReference(@"FileProcessingLocks"); 35 | } 36 | catch (Exception ex) 37 | { 38 | throw new Exception($@"Error creating table client for locks: {ex}", ex); 39 | } 40 | 41 | while (true) 42 | { 43 | try 44 | { 45 | await customerFilesTable.CreateIfNotExistsAsync(); 46 | break; 47 | } 48 | catch { } 49 | } 50 | 51 | return customerFilesTable; 52 | } 53 | 54 | public static CustomerBlobAttributes ParseEventGridPayload(EventGridEvent eventGridItem, ILogger log) 55 | { 56 | if (eventGridItem.EventType == @"Microsoft.Storage.BlobCreated") 57 | { 58 | var egData = (dynamic)eventGridItem.Data; 59 | if (egData.api == @"PutBlob" 60 | && egData.url.ToString().EndsWith(".csv")) 61 | { 62 | try 63 | { 64 | var retVal = CustomerBlobAttributes.Parse((string)egData.url); 65 | if (retVal != null && !retVal.ContainerName.Equals(retVal.CustomerName)) 66 | { 67 | throw new ArgumentException($@"File '{retVal.Filename}' uploaded to container '{retVal.ContainerName}' doesn't have the right prefix: the first token in the filename ({retVal.CustomerName}) must be the customer name, which should match the container name", nameof(eventGridItem)); 68 | } 69 | 70 | return retVal; 71 | } 72 | catch (Exception ex) 73 | { 74 | log.LogError(@"Error parsing Event Grid payload", ex); 75 | } 76 | } 77 | } 78 | 79 | return null; 80 | } 81 | 82 | public static IEnumerable GetExpectedFilesForCustomer() => new[] { @"type1", @"type2", @"type3", @"type4", @"type5", @"type7", @"type8", @"type9", @"type10" }; 83 | 84 | public static async Task DoValidationAsync(string prefix, ILogger logger = null) 85 | { 86 | logger?.LogTrace(@"ValidateFileSet run."); 87 | if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"CustomerBlobStorage"), out var storageAccount)) 88 | { 89 | throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); 90 | } 91 | 92 | logger?.LogTrace($@"prefix: {prefix}"); 93 | 94 | var filePrefix = prefix.Substring(prefix.LastIndexOf('/') + 1); 95 | logger?.LogTrace($@"filePrefix: {filePrefix}"); 96 | 97 | var blobClient = storageAccount.CreateCloudBlobClient(); 98 | var targetBlobs = await blobClient.ListBlobsAsync(WebUtility.UrlDecode(prefix)); 99 | var customerName = filePrefix.Split('_').First().Split('-').Last(); 100 | 101 | var errors = new List(); 102 | var expectedFiles = Helpers.GetExpectedFilesForCustomer(); 103 | 104 | foreach (var blobDetails in targetBlobs) 105 | { 106 | var blob = await blobClient.GetBlobReferenceFromServerAsync(blobDetails.StorageUri.PrimaryUri); 107 | 108 | var fileParts = CustomerBlobAttributes.Parse(blob.Uri.AbsolutePath); 109 | if (!expectedFiles.Contains(fileParts.Filetype, StringComparer.OrdinalIgnoreCase)) 110 | { 111 | logger?.LogTrace($@"{blob.Name} skipped. Isn't in the list of file types to process ({string.Join(", ", expectedFiles)}) for customer '{customerName}'"); 112 | continue; 113 | } 114 | 115 | var lowerFileType = fileParts.Filetype.ToLowerInvariant(); 116 | uint numColumns = 0; 117 | switch (lowerFileType) 118 | { 119 | case @"type5": // salestype 120 | numColumns = 2; 121 | break; 122 | case @"type10": // mixed 123 | case @"type4": // shipfrom 124 | numColumns = 3; 125 | break; 126 | case @"type1": // channel 127 | case @"type2": // customer 128 | numColumns = 4; 129 | break; 130 | case @"type9": // itemdetail 131 | numColumns = 5; 132 | break; 133 | case @"type3": // shipto 134 | numColumns = 14; 135 | break; 136 | case @"type6": // salesdetail 137 | numColumns = 15; 138 | break; 139 | case @"type8": // product 140 | numColumns = 21; 141 | break; 142 | case @"type7": // sales 143 | numColumns = 23; 144 | break; 145 | default: 146 | throw new ArgumentOutOfRangeException(nameof(prefix), $@"Unhandled file type: {fileParts.Filetype}"); 147 | } 148 | 149 | errors.AddRange(await ValidateCsvStructureAsync(blob, numColumns, lowerFileType)); 150 | } 151 | 152 | if (errors.Any()) 153 | { 154 | logger.LogError($@"Errors found in batch {filePrefix}: {string.Join(@", ", errors)}"); 155 | 156 | // move files to 'invalid-set' folder 157 | await Helpers.MoveBlobsAsync(blobClient, targetBlobs, @"invalid-set", logger); 158 | return false; 159 | } 160 | else 161 | { 162 | // move these files to 'valid-set' folder 163 | await Helpers.MoveBlobsAsync(blobClient, targetBlobs, @"valid-set", logger); 164 | 165 | logger.LogInformation($@"Set {filePrefix} successfully validated and queued for further processing."); 166 | return true; 167 | } 168 | } 169 | 170 | private static async Task> ValidateCsvStructureAsync(ICloudBlob blob, uint requiredNumberOfColumnsPerLine, string filetypeDescription) 171 | { 172 | var errs = new List(); 173 | try 174 | { 175 | using (var blobReader = new StreamReader(await blob.OpenReadAsync(new AccessCondition(), new BlobRequestOptions(), new OperationContext()))) 176 | { 177 | var fileAttributes = CustomerBlobAttributes.Parse(blob.Uri.AbsolutePath); 178 | 179 | for (var lineNumber = 0; !blobReader.EndOfStream; lineNumber++) 180 | { 181 | var errorPrefix = $@"{filetypeDescription} file '{fileAttributes.Filename}' Record {lineNumber}"; 182 | var line = blobReader.ReadLine(); 183 | var fields = line.Split(','); 184 | if (fields.Length != requiredNumberOfColumnsPerLine) 185 | { 186 | errs.Add($@"{errorPrefix} is malformed. Should have {requiredNumberOfColumnsPerLine} values; has {fields.Length}"); 187 | continue; 188 | } 189 | 190 | for (var i = 0; i < fields.Length; i++) 191 | { 192 | errorPrefix = $@"{errorPrefix} Field {i}"; 193 | var field = fields[i]; 194 | // each field must be enclosed in double quotes 195 | if (field[0] != '"' || field.Last() != '"') 196 | { 197 | errs.Add($@"{errorPrefix}: value ({field}) is not enclosed in double quotes ("")"); 198 | continue; 199 | } 200 | } 201 | } 202 | 203 | // Validate file is UTF-8 encoded 204 | if (!blobReader.CurrentEncoding.BodyName.Equals("utf-8", StringComparison.OrdinalIgnoreCase)) 205 | { 206 | errs.Add($@"{blob.Name} is not UTF-8 encoded"); 207 | } 208 | } 209 | } 210 | catch (StorageException storEx) 211 | { 212 | SwallowStorage404(storEx); 213 | } 214 | return errs; 215 | } 216 | 217 | private static void SwallowStorage404(StorageException storEx) 218 | { 219 | var webEx = storEx.InnerException as WebException; 220 | if ((webEx.Response as HttpWebResponse)?.StatusCode == HttpStatusCode.NotFound) 221 | { 222 | // Ignore 223 | } 224 | else 225 | { 226 | throw storEx; 227 | } 228 | } 229 | 230 | public static async Task MoveBlobsAsync(CloudBlobClient blobClient, IEnumerable targetBlobs, string folderName, ILogger logger = null) 231 | { 232 | foreach (var b in targetBlobs) 233 | { 234 | var blobRef = await blobClient.GetBlobReferenceFromServerAsync(b.StorageUri.PrimaryUri); 235 | var sourceBlob = b.Container.GetBlockBlobReference(blobRef.Name); 236 | 237 | var targetBlob = blobRef.Container 238 | .GetDirectoryReference($@"{folderName}") 239 | .GetBlockBlobReference(Path.GetFileName(blobRef.Name)); 240 | 241 | string sourceLeaseGuid = Guid.NewGuid().ToString(), targetLeaseGuid = Guid.NewGuid().ToString(); 242 | var sourceLeaseId = await sourceBlob.AcquireLeaseAsync(TimeSpan.FromSeconds(60), sourceLeaseGuid); 243 | 244 | await targetBlob.StartCopyAsync(sourceBlob); 245 | 246 | while (targetBlob.CopyState.Status == CopyStatus.Pending) 247 | { 248 | ; // spinlock until the copy completes 249 | } 250 | 251 | var copySucceeded = targetBlob.CopyState.Status == CopyStatus.Success; 252 | if (!copySucceeded) 253 | { 254 | logger?.LogError($@"Error copying {sourceBlob.Name} to {folderName} folder. Retrying once..."); 255 | 256 | await targetBlob.StartCopyAsync(sourceBlob); 257 | 258 | while (targetBlob.CopyState.Status == CopyStatus.Pending) 259 | { 260 | ; // spinlock until the copy completes 261 | } 262 | 263 | copySucceeded = targetBlob.CopyState.Status == CopyStatus.Success; 264 | if (!copySucceeded) 265 | { 266 | logger?.LogError($@"Error retrying copy of {sourceBlob.Name} to {folderName} folder. File not moved."); 267 | } 268 | } 269 | 270 | if (copySucceeded) 271 | { 272 | #if DEBUG 273 | try 274 | { 275 | #endif 276 | await sourceBlob.ReleaseLeaseAsync(new AccessCondition { LeaseId = sourceLeaseId }); 277 | await sourceBlob.DeleteAsync(); 278 | #if DEBUG 279 | } 280 | catch (StorageException ex) 281 | { 282 | logger?.LogError($@"Error deleting blob {sourceBlob.Name}", ex); 283 | } 284 | #endif 285 | 286 | } 287 | } 288 | } 289 | } 290 | } -------------------------------------------------------------------------------- /AzureFunctions.v3/LockTableEntity.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Linq; 3 | using System.Threading.Tasks; 4 | using Microsoft.WindowsAzure.Storage; 5 | using Microsoft.WindowsAzure.Storage.Table; 6 | 7 | namespace FileValidation 8 | { 9 | class LockTableEntity : TableEntity 10 | { 11 | public LockTableEntity() : base() { } 12 | 13 | public LockTableEntity(string prefix) : base(prefix, prefix) { } 14 | 15 | [IgnoreProperty] 16 | public string Prefix 17 | { 18 | get => this.PartitionKey; 19 | set 20 | { 21 | this.PartitionKey = value; 22 | this.RowKey = value; 23 | } 24 | } 25 | 26 | [IgnoreProperty] 27 | public BatchState State { get; set; } = BatchState.Waiting; 28 | 29 | public string DbState 30 | { 31 | get => this.State.ToString(); 32 | set => this.State = (BatchState)Enum.Parse(typeof(BatchState), value); 33 | } 34 | 35 | public enum BatchState 36 | { 37 | Waiting, InProgress, Done 38 | } 39 | 40 | public static async Task GetLockRecordAsync(string filePrefix, CloudTable customerFilesTable = null, CloudStorageAccount customerFilesTableStorageAccount = null) 41 | { 42 | customerFilesTable = customerFilesTable ?? await Helpers.GetLockTableAsync(customerFilesTableStorageAccount); 43 | 44 | return (await customerFilesTable.ExecuteQueryAsync( 45 | new TableQuery() 46 | .Where(TableQuery.GenerateFilterCondition(@"PartitionKey", QueryComparisons.Equal, filePrefix)))) 47 | .SingleOrDefault(); 48 | } 49 | 50 | public static async Task UpdateAsync(string filePrefix, BatchState state, CloudTable customerFilesTable = null, CloudStorageAccount customerFilesTableStorageAccount = null) 51 | { 52 | var entity = await GetLockRecordAsync(filePrefix, customerFilesTable); 53 | entity.State = state; 54 | 55 | customerFilesTable = customerFilesTable ?? await Helpers.GetLockTableAsync(customerFilesTableStorageAccount); 56 | 57 | await customerFilesTable.ExecuteAsync(TableOperation.Replace(entity)); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /AzureFunctions.v3/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "logLevel": { 5 | "default": "Trace" 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /AzureFunctions.v3/sample.local.settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "IsEncrypted": false, 3 | "Values": { 4 | "AzureWebJobsStorage": "UseDevelopmentStorage=true", 5 | 6 | "CustomerBlobStorage": "", 7 | "ValidateFunctionUrl": "http://localhost:7071/api/Validate" 8 | } 9 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /LogicApps/DeploymentHelper.cs: -------------------------------------------------------------------------------- 1 | // Requires the following Azure NuGet packages and related dependencies: 2 | // package id="Microsoft.Azure.Management.Authorization" version="2.0.0" 3 | // package id="Microsoft.Azure.Management.ResourceManager" version="1.4.0-preview" 4 | // package id="Microsoft.Rest.ClientRuntime.Azure.Authentication" version="2.2.8-preview" 5 | 6 | using System; 7 | using System.IO; 8 | using Microsoft.Azure.Management.ResourceManager; 9 | using Microsoft.Azure.Management.ResourceManager.Models; 10 | using Newtonsoft.Json; 11 | using Newtonsoft.Json.Linq; 12 | 13 | namespace PortalGenerated 14 | { 15 | /// 16 | /// This is a helper class for deploying an Azure Resource Manager template 17 | /// More info about template deployments can be found here https://go.microsoft.com/fwLink/?LinkID=733371 18 | /// 19 | class DeploymentHelper 20 | { 21 | string subscriptionId = "your-subscription-id"; 22 | string clientId = "your-service-principal-clientId"; 23 | string clientSecret = "your-service-principal-client-secret"; 24 | string resourceGroupName = "resource-group-name"; 25 | string deploymentName = "deployment-name"; 26 | string resourceGroupLocation = "resource-group-location"; // must be specified for creating a new resource group 27 | string pathToTemplateFile = "path-to-template.json-on-disk"; 28 | string pathToParameterFile = "path-to-parameters.json-on-disk"; 29 | string tenantId = "tenant-id"; 30 | 31 | public async void Run() 32 | { 33 | // Try to obtain the service credentials 34 | var serviceCreds = await ApplicationTokenProvider.LoginSilentAsync(tenantId, clientId, clientSecret); 35 | 36 | // Read the template and parameter file contents 37 | JObject templateFileContents = GetJsonFileContents(pathToTemplateFile); 38 | JObject parameterFileContents = GetJsonFileContents(pathToParameterFile); 39 | 40 | // Create the resource manager client 41 | var resourceManagementClient = new ResourceManagementClient(serviceCreds); 42 | resourceManagementClient.SubscriptionId = subscriptionId; 43 | 44 | // Create or check that resource group exists 45 | EnsureResourceGroupExists(resourceManagementClient, resourceGroupName, resourceGroupLocation); 46 | 47 | // Start a deployment 48 | DeployTemplate(resourceManagementClient, resourceGroupName, deploymentName, templateFileContents, parameterFileContents); 49 | } 50 | 51 | /// 52 | /// Reads a JSON file from the specified path 53 | /// 54 | /// The full path to the JSON file 55 | /// The JSON file contents 56 | private JObject GetJsonFileContents(string pathToJson) 57 | { 58 | JObject templatefileContent = new JObject(); 59 | using (StreamReader file = File.OpenText(pathToJson)) 60 | { 61 | using (JsonTextReader reader = new JsonTextReader(file)) 62 | { 63 | templatefileContent = (JObject)JToken.ReadFrom(reader); 64 | return templatefileContent; 65 | } 66 | } 67 | } 68 | 69 | /// 70 | /// Ensures that a resource group with the specified name exists. If it does not, will attempt to create one. 71 | /// 72 | /// The resource manager client. 73 | /// The name of the resource group. 74 | /// The resource group location. Required when creating a new resource group. 75 | private static void EnsureResourceGroupExists(ResourceManagementClient resourceManagementClient, string resourceGroupName, string resourceGroupLocation) 76 | { 77 | if (resourceManagementClient.ResourceGroups.CheckExistence(resourceGroupName) != true) 78 | { 79 | Console.WriteLine(string.Format("Creating resource group '{0}' in location '{1}'", resourceGroupName, resourceGroupLocation)); 80 | var resourceGroup = new ResourceGroup(); 81 | resourceGroup.Location = resourceGroupLocation; 82 | resourceManagementClient.ResourceGroups.CreateOrUpdate(resourceGroupName, resourceGroup); 83 | } 84 | else 85 | { 86 | Console.WriteLine(string.Format("Using existing resource group '{0}'", resourceGroupName)); 87 | } 88 | } 89 | 90 | /// 91 | /// Starts a template deployment. 92 | /// 93 | /// The resource manager client. 94 | /// The name of the resource group. 95 | /// The name of the deployment. 96 | /// The template file contents. 97 | /// The parameter file contents. 98 | private static void DeployTemplate(ResourceManagementClient resourceManagementClient, string resourceGroupName, string deploymentName, JObject templateFileContents, JObject parameterFileContents) 99 | { 100 | Console.WriteLine(string.Format("Starting template deployment '{0}' in resource group '{1}'", deploymentName, resourceGroupName)); 101 | var deployment = new Deployment(); 102 | 103 | deployment.Properties = new DeploymentProperties 104 | { 105 | Mode = DeploymentMode.Incremental, 106 | Template = templateFileContents, 107 | Parameters = parameterFileContents["parameters"].ToObject() 108 | }; 109 | 110 | var deploymentResult = resourceManagementClient.Deployments.CreateOrUpdate(resourceGroupName, deploymentName, deployment); 111 | Console.WriteLine(string.Format("Deployment status: {0}", deploymentResult.Properties.ProvisioningState)); 112 | } 113 | } 114 | } -------------------------------------------------------------------------------- /LogicApps/deploy.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .SYNOPSIS 3 | Deploys a template to Azure 4 | 5 | .DESCRIPTION 6 | Deploys an Azure Resource Manager template 7 | 8 | .PARAMETER subscriptionId 9 | The subscription id where the template will be deployed. 10 | 11 | .PARAMETER resourceGroupName 12 | The resource group where the template will be deployed. Can be the name of an existing or a new resource group. 13 | 14 | .PARAMETER resourceGroupLocation 15 | Optional, a resource group location. If specified, will try to create a new resource group in this location. If not specified, assumes resource group is existing. 16 | 17 | .PARAMETER deploymentName 18 | The deployment name. 19 | 20 | .PARAMETER templateFilePath 21 | Optional, path to the template file. Defaults to template.json. 22 | 23 | .PARAMETER parametersFilePath 24 | Optional, path to the parameters file. Defaults to parameters.json. If file is not found, will prompt for parameter values based on template. 25 | #> 26 | 27 | param( 28 | [Parameter(Mandatory=$True)] 29 | [string] 30 | $subscriptionId, 31 | 32 | [Parameter(Mandatory=$True)] 33 | [string] 34 | $resourceGroupName, 35 | 36 | [string] 37 | $resourceGroupLocation, 38 | 39 | [Parameter(Mandatory=$True)] 40 | [string] 41 | $deploymentName, 42 | 43 | [string] 44 | $templateFilePath = "template.json", 45 | 46 | [string] 47 | $parametersFilePath = "parameters.json" 48 | ) 49 | 50 | <# 51 | .SYNOPSIS 52 | Registers RPs 53 | #> 54 | Function RegisterRP { 55 | Param( 56 | [string]$ResourceProviderNamespace 57 | ) 58 | 59 | Write-Host "Registering resource provider '$ResourceProviderNamespace'"; 60 | Register-AzureRmResourceProvider -ProviderNamespace $ResourceProviderNamespace; 61 | } 62 | 63 | #****************************************************************************** 64 | # Script body 65 | # Execution begins here 66 | #****************************************************************************** 67 | $ErrorActionPreference = "Stop" 68 | 69 | # sign in 70 | Write-Host "Logging in..."; 71 | Login-AzureRmAccount; 72 | 73 | # select subscription 74 | Write-Host "Selecting subscription '$subscriptionId'"; 75 | Select-AzureRmSubscription -SubscriptionID $subscriptionId; 76 | 77 | # Register RPs 78 | $resourceProviders = @("microsoft.logic"); 79 | if($resourceProviders.length) { 80 | Write-Host "Registering resource providers" 81 | foreach($resourceProvider in $resourceProviders) { 82 | RegisterRP($resourceProvider); 83 | } 84 | } 85 | 86 | #Create or check for existing resource group 87 | $resourceGroup = Get-AzureRmResourceGroup -Name $resourceGroupName -ErrorAction SilentlyContinue 88 | if(!$resourceGroup) 89 | { 90 | Write-Host "Resource group '$resourceGroupName' does not exist. To create a new resource group, please enter a location."; 91 | if(!$resourceGroupLocation) { 92 | $resourceGroupLocation = Read-Host "resourceGroupLocation"; 93 | } 94 | Write-Host "Creating resource group '$resourceGroupName' in location '$resourceGroupLocation'"; 95 | New-AzureRmResourceGroup -Name $resourceGroupName -Location $resourceGroupLocation 96 | } 97 | else{ 98 | Write-Host "Using existing resource group '$resourceGroupName'"; 99 | } 100 | 101 | # Start the deployment 102 | Write-Host "Starting deployment..."; 103 | if(Test-Path $parametersFilePath) { 104 | New-AzureRmResourceGroupDeployment -ResourceGroupName $resourceGroupName -TemplateFile $templateFilePath -TemplateParameterFile $parametersFilePath; 105 | } else { 106 | New-AzureRmResourceGroupDeployment -ResourceGroupName $resourceGroupName -TemplateFile $templateFilePath; 107 | } -------------------------------------------------------------------------------- /LogicApps/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | 5 | # -e: immediately exit if any command has a non-zero exit status 6 | # -o: prevents errors in a pipeline from being masked 7 | # IFS new value is less likely to cause confusing bugs when looping arrays or arguments (e.g. $@) 8 | 9 | usage() { echo "Usage: $0 -i -g -n -l " 1>&2; exit 1; } 10 | 11 | declare subscriptionId="" 12 | declare resourceGroupName="" 13 | declare deploymentName="" 14 | declare resourceGroupLocation="" 15 | 16 | # Initialize parameters specified from command line 17 | while getopts ":i:g:n:l:" arg; do 18 | case "${arg}" in 19 | i) 20 | subscriptionId=${OPTARG} 21 | ;; 22 | g) 23 | resourceGroupName=${OPTARG} 24 | ;; 25 | n) 26 | deploymentName=${OPTARG} 27 | ;; 28 | l) 29 | resourceGroupLocation=${OPTARG} 30 | ;; 31 | esac 32 | done 33 | shift $((OPTIND-1)) 34 | 35 | #Prompt for parameters is some required parameters are missing 36 | if [[ -z "$subscriptionId" ]]; then 37 | echo "Your subscription ID can be looked up with the CLI using: az account show --out json " 38 | echo "Enter your subscription ID:" 39 | read subscriptionId 40 | [[ "${subscriptionId:?}" ]] 41 | fi 42 | 43 | if [[ -z "$resourceGroupName" ]]; then 44 | echo "This script will look for an existing resource group, otherwise a new one will be created " 45 | echo "You can create new resource groups with the CLI using: az group create " 46 | echo "Enter a resource group name" 47 | read resourceGroupName 48 | [[ "${resourceGroupName:?}" ]] 49 | fi 50 | 51 | if [[ -z "$deploymentName" ]]; then 52 | echo "Enter a name for this deployment:" 53 | read deploymentName 54 | fi 55 | 56 | if [[ -z "$resourceGroupLocation" ]]; then 57 | echo "If creating a *new* resource group, you need to set a location " 58 | echo "You can lookup locations with the CLI using: az account list-locations " 59 | 60 | echo "Enter resource group location:" 61 | read resourceGroupLocation 62 | fi 63 | 64 | #templateFile Path - template file to be used 65 | templateFilePath="template.json" 66 | 67 | if [ ! -f "$templateFilePath" ]; then 68 | echo "$templateFilePath not found" 69 | exit 1 70 | fi 71 | 72 | #parameter file path 73 | parametersFilePath="parameters.json" 74 | 75 | if [ ! -f "$parametersFilePath" ]; then 76 | echo "$parametersFilePath not found" 77 | exit 1 78 | fi 79 | 80 | if [ -z "$subscriptionId" ] || [ -z "$resourceGroupName" ] || [ -z "$deploymentName" ]; then 81 | echo "Either one of subscriptionId, resourceGroupName, deploymentName is empty" 82 | usage 83 | fi 84 | 85 | #login to azure using your credentials 86 | az account show 1> /dev/null 87 | 88 | if [ $? != 0 ]; 89 | then 90 | az login 91 | fi 92 | 93 | #set the default subscription id 94 | az account set --subscription $subscriptionId 95 | 96 | set +e 97 | 98 | #Check for existing RG 99 | az group show $resourceGroupName 1> /dev/null 100 | 101 | if [ $? != 0 ]; then 102 | echo "Resource group with name" $resourceGroupName "could not be found. Creating new resource group.." 103 | set -e 104 | ( 105 | set -x 106 | az group create --name $resourceGroupName --location $resourceGroupLocation 1> /dev/null 107 | ) 108 | else 109 | echo "Using existing resource group..." 110 | fi 111 | 112 | #Start deployment 113 | echo "Starting deployment..." 114 | ( 115 | set -x 116 | az group deployment create --name "$deploymentName" --resource-group "$resourceGroupName" --template-file "$templateFilePath" --parameters "@${parametersFilePath}" 117 | ) 118 | 119 | if [ $? == 0 ]; 120 | then 121 | echo "Template has been successfully deployed" 122 | fi 123 | -------------------------------------------------------------------------------- /LogicApps/deployer.rb: -------------------------------------------------------------------------------- 1 | require 'azure_mgmt_resources' 2 | 3 | class Deployer 4 | 5 | # Initialize the deployer class with subscription, resource group and resource group location. The class will raise an 6 | # ArgumentError if there are empty values for Tenant Id, Client Id or Client Secret environment variables. 7 | # 8 | # @param [String] subscription_id the subscription to deploy the template 9 | # @param [String] resource_group the resource group to create or update and then deploy the template 10 | # @param [String] resource_group_location the location of the resource group 11 | def initialize(subscription_id, resource_group, resource_group_location) 12 | raise ArgumentError.new("Missing template file 'template.json' in current directory.") unless File.exist?('template.json') 13 | raise ArgumentError.new("Missing parameters file 'parameters.json' in current directory.") unless File.exist?('parameters.json') 14 | @resource_group = resource_group 15 | @subscription_id = subscription_id 16 | @resource_group_location = resource_group_location 17 | provider = MsRestAzure::ApplicationTokenProvider.new( 18 | ENV['AZURE_TENANT_ID'], 19 | ENV['AZURE_CLIENT_ID'], 20 | ENV['AZURE_CLIENT_SECRET']) 21 | credentials = MsRest::TokenCredentials.new(provider) 22 | @client = Azure::ARM::Resources::ResourceManagementClient.new(credentials) 23 | @client.subscription_id = @subscription_id 24 | end 25 | 26 | # Deploy the template to a resource group 27 | def deploy 28 | # ensure the resource group is created 29 | params = Azure::ARM::Resources::Models::ResourceGroup.new.tap do |rg| 30 | rg.location = @resource_group_location 31 | end 32 | @client.resource_groups.create_or_update(@resource_group, params).value! 33 | 34 | # build the deployment from a json file template from parameters 35 | template = File.read(File.expand_path(File.join(__dir__, 'template.json'))) 36 | deployment = Azure::ARM::Resources::Models::Deployment.new 37 | deployment.properties = Azure::ARM::Resources::Models::DeploymentProperties.new 38 | deployment.properties.template = JSON.parse(template) 39 | deployment.properties.mode = Azure::ARM::Resources::Models::DeploymentMode::Incremental 40 | 41 | # build the deployment template parameters from Hash to {key: {value: value}} format 42 | deploy_params = File.read(File.expand_path(File.join(__dir__, 'parameters.json'))) 43 | deployment.properties.parameters = JSON.parse(deploy_params)["parameters"] 44 | 45 | # put the deployment to the resource group 46 | @client.deployments.create_or_update(@resource_group, 'azure-sample', deployment) 47 | end 48 | end 49 | 50 | # Get user inputs and execute the script 51 | if(ARGV.empty?) 52 | puts "Please specify subscriptionId resourceGroupName resourceGroupLocation as command line arguments" 53 | exit 54 | end 55 | 56 | subscription_id = ARGV[0] # Azure Subscription Id 57 | resource_group = ARGV[1] # The resource group for deployment 58 | resource_group_location = ARGV[2] # The resource group location 59 | 60 | msg = "\nInitializing the Deployer class with subscription id: #{subscription_id}, resource group: #{resource_group}" 61 | msg += "\nand resource group location: #{resource_group_location}...\n\n" 62 | puts msg 63 | 64 | # Initialize the deployer class 65 | deployer = Deployer.new(subscription_id, resource_group, resource_group_location) 66 | 67 | puts "Beginning the deployment... \n\n" 68 | # Deploy the template 69 | deployment = deployer.deploy 70 | 71 | puts "Done deploying!!" -------------------------------------------------------------------------------- /LogicApps/parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "workflows_FileValidation_BatchReceiver_name": { 6 | "value": null 7 | }, 8 | "workflows_FileValidation_BatchProcessor_name": { 9 | "value": null 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /LogicApps/template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "workflows_FileValidation_BatchReceiver_name": { 6 | "defaultValue": "FileValidation-BatchReceiver", 7 | "type": "String" 8 | }, 9 | "workflows_FileValidation_BatchProcessor_name": { 10 | "defaultValue": "FileValidation-BatchProcessor", 11 | "type": "String" 12 | } 13 | }, 14 | "variables": {}, 15 | "resources": [ 16 | { 17 | "comments": "Generalized from resource: '/subscriptions/0c249eea-065b-4034-955e-795d56b1e5d1/resourceGroups/serverless-demo-test/providers/Microsoft.Logic/workflows/FileValidation-BatchProcessor'.", 18 | "type": "Microsoft.Logic/workflows", 19 | "name": "[parameters('workflows_FileValidation_BatchProcessor_name')]", 20 | "apiVersion": "2017-07-01", 21 | "location": "westus2", 22 | "tags": {}, 23 | "scale": null, 24 | "properties": { 25 | "state": "Enabled", 26 | "definition": { 27 | "$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#", 28 | "contentVersion": "1.0.0.0", 29 | "parameters": {}, 30 | "triggers": { 31 | "manual": { 32 | "type": "Request", 33 | "kind": "EventGrid", 34 | "inputs": { 35 | "schema": { 36 | "properties": { 37 | "data": {} 38 | }, 39 | "type": "object" 40 | } 41 | } 42 | } 43 | }, 44 | "actions": { 45 | "BatchReceiver_2": { 46 | "runAfter": { 47 | "Partition": [ 48 | "Succeeded" 49 | ] 50 | }, 51 | "type": "SendToBatch", 52 | "inputs": { 53 | "batchName": "BatchReciever", 54 | "content": "@outputs('Compose')", 55 | "host": { 56 | "triggerName": "Batch_messages", 57 | "workflow": { 58 | "id": "[resourceId('Microsoft.Logic/workflows', parameters('workflows_FileValidation_BatchReceiver_name'))]" 59 | } 60 | }, 61 | "partitionName": "@{outputs('Partition')}" 62 | } 63 | }, 64 | "Compose": { 65 | "runAfter": { 66 | "Parse_JSON": [ 67 | "Succeeded" 68 | ] 69 | }, 70 | "type": "Compose", 71 | "inputs": "@body('Parse_JSON')[0]?['data']['url']" 72 | }, 73 | "Parse_JSON": { 74 | "runAfter": {}, 75 | "type": "ParseJson", 76 | "inputs": { 77 | "content": "@triggerBody()", 78 | "schema": { 79 | "items": { 80 | "properties": { 81 | "data": { 82 | "properties": { 83 | "api": { 84 | "type": "string" 85 | }, 86 | "blobType": { 87 | "type": "string" 88 | }, 89 | "clientRequestId": { 90 | "type": "string" 91 | }, 92 | "contentLength": { 93 | "type": "number" 94 | }, 95 | "contentType": { 96 | "type": "string" 97 | }, 98 | "eTag": { 99 | "type": "string" 100 | }, 101 | "requestId": { 102 | "type": "string" 103 | }, 104 | "sequencer": { 105 | "type": "string" 106 | }, 107 | "storageDiagnostics": { 108 | "properties": { 109 | "batchId": { 110 | "type": "string" 111 | } 112 | }, 113 | "type": "object" 114 | }, 115 | "url": { 116 | "type": "string" 117 | } 118 | }, 119 | "type": "object" 120 | }, 121 | "dataVersion": { 122 | "type": "string" 123 | }, 124 | "eventTime": { 125 | "type": "string" 126 | }, 127 | "eventType": { 128 | "type": "string" 129 | }, 130 | "id": { 131 | "type": "string" 132 | }, 133 | "metadataVersion": { 134 | "type": "string" 135 | }, 136 | "subject": { 137 | "type": "string" 138 | }, 139 | "topic": { 140 | "type": "string" 141 | } 142 | }, 143 | "required": [ 144 | "topic", 145 | "subject", 146 | "eventType", 147 | "eventTime", 148 | "id", 149 | "data", 150 | "dataVersion", 151 | "metadataVersion" 152 | ], 153 | "type": "object" 154 | }, 155 | "type": "array" 156 | } 157 | } 158 | }, 159 | "Partition": { 160 | "runAfter": { 161 | "Compose": [ 162 | "Succeeded" 163 | ] 164 | }, 165 | "type": "Compose", 166 | "inputs": "@substring(outputs('Compose'), 50, 14)" 167 | } 168 | }, 169 | "outputs": {} 170 | }, 171 | "parameters": {} 172 | }, 173 | "dependsOn": [ 174 | "[resourceId('Microsoft.Logic/workflows', parameters('workflows_FileValidation_BatchReceiver_name'))]" 175 | ] 176 | }, 177 | { 178 | "comments": "Generalized from resource: '/subscriptions/0c249eea-065b-4034-955e-795d56b1e5d1/resourceGroups/serverless-demo-test/providers/Microsoft.Logic/workflows/FileValidation-BatchReceiver'.", 179 | "type": "Microsoft.Logic/workflows", 180 | "name": "[parameters('workflows_FileValidation_BatchReceiver_name')]", 181 | "apiVersion": "2017-07-01", 182 | "location": "westus2", 183 | "tags": {}, 184 | "scale": null, 185 | "properties": { 186 | "state": "Enabled", 187 | "definition": { 188 | "$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#", 189 | "contentVersion": "1.0.0.0", 190 | "parameters": {}, 191 | "triggers": { 192 | "Batch_messages": { 193 | "type": "Batch", 194 | "inputs": { 195 | "configurations": { 196 | "BatchReciever": { 197 | "releaseCriteria": { 198 | "messageCount": 3 199 | } 200 | } 201 | }, 202 | "mode": "Inline" 203 | } 204 | } 205 | }, 206 | "actions": { 207 | "For_each": { 208 | "foreach": "@triggerBody()['items']", 209 | "actions": { 210 | "Change_to_run_validation_on_each_item's_content": { 211 | "runAfter": {}, 212 | "type": "Compose", 213 | "inputs": "@items('For_each')['content']" 214 | } 215 | }, 216 | "runAfter": {}, 217 | "type": "Foreach" 218 | } 219 | }, 220 | "outputs": {} 221 | }, 222 | "parameters": {} 223 | }, 224 | "dependsOn": [] 225 | } 226 | ] 227 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | page_type: sample 3 | languages: 4 | - csharp 5 | - python 6 | products: 7 | - azure 8 | - azure-blob-storage 9 | - azure-event-grid 10 | - azure-functions 11 | - azure-logic-apps 12 | - azure-storage 13 | - azure-table-storage 14 | - dotnet 15 | description: This sample outlines ways to accomplish validation across files received in a batch format using Azure Serverless technologies. 16 | --- 17 | 18 | # File processing and validation using Azure Functions, Logic Apps, and Durable Functions 19 | 20 | This sample outlines multiple ways to accomplish the following set of requirements using Azure Serverless technologies. One way uses the "traditional" serverless approach, another Logic Apps, and another Azure Functions' _Durable Functions_ feature. 21 | 22 | ## Problem statement 23 | 24 | Given a set of customers, assume each customer uploads data to our backend for historical record keeping and analysis. This data arrives in the form of a **set** of `.csv` files with each file containing different data. Think of them almost as SQL Table dumps in CSV format. 25 | 26 | When the customer uploads the files, we have two primary objectives: 27 | 28 | 1. Ensure that all the files required for the customer are present for a particular "set" (aka "batch") of data 29 | 2. Only when we have all the files for a set, continue on to validate the structure of each file ensuring a handful of requirements: 30 | * Each file must be UTF-8 encoded 31 | * Depending on the file (type1, type2, etc), ensure the correct # of columns are present in the CSV file 32 | 33 | ## Setup 34 | 35 | To accomplish this sample, you'll need to set up a few things: 36 | 37 | 1. Azure General Purpose Storage 38 | * For the Functions SDK to store its dashboard info, and the Durable Functions to store their state data 39 | 1. Azure Blob Storage 40 | * For the customer files to be uploaded in to 41 | 1. Azure Event Grid (with Storage Events) 42 | 1. ngrok to enable local Azure Function triggering from Event Grid (see this blog post for more) 43 | 1. [Visual Studio 2019](https://visualstudio.microsoft.com/downloads/) 44 | 1. Azure Storage Explorer (makes testing easier) 45 | 46 | For the Python version of this sample (folder `AzureFunctions.Python`), follow the instructions in its dedicated [readme](/AzureFunctions.Python/README.md). 47 | 48 | ## Execution 49 | 50 | Pull down the code. 51 | 52 | Copy `sample.local.settings.json` in the `AzureFunctions.v3` **project** to a new file called `local.settings.json`. 53 | 54 | This file will be used across the functions, durable or otherwise. 55 | 56 | Next, run any of the Function apps in this solution. You can use the v1 (.Net Framework) or the v3 (.Net Core) version, it's only needed for Event Grid validation. 57 | With the function running, add an Event Grid Subscription to the Blob Storage account (from step 2), pointing to the ngrok-piped endpoint you created in step 4. The URL should look something like this: 58 | 59 | * Normal Functions: `https://b3252cc3.ngrok.io/api/EnsureAllFiles` 60 | * Durable Functions: `https://b3252cc3.ngrok.io/api/Orchestrator` 61 | 62 | ![An Event Grid subscription set up to target an ngrok endpoint](images/ngroksubscription.png) 63 | 64 | Upon saving this subscription, you'll see your locally-running Function get hit with a request and return HTTP OK, then the Subscription will go green in Azure and you're set. 65 | 66 | Now, open Azure Storage Explorer and connect to the *Blob* Storage Account you've created. In here, create a container named `cust1`. Inside the container, create a new folder called `inbound`. 67 | 68 | Take one of the `.csv` files from the `sampledata` folder of this repo, and drop it in to the inbound folder. 69 | 70 | You'll see the endpoint you defined as your Event Grid webhook subscription get hit. 71 | 72 | ### Durable Function Execution 73 | 74 | 1. Determine the "batch prefix" of the file that was dropped. This consists of the customer name (cust1), and a datetime stamp in the format YYYYMMDD_HHMM, making the batch prefix for the first batch in `sampledata` defined as `cust1_20171010_1112` 75 | 1. Check to see if a sub-orchestration for this batch already exists. 76 | 1. If not, spin one up and pass along the Event Grid data that triggered this execution 77 | 1. If so, use `RaiseEvent` to pass the filename along to the instance. 78 | 79 | In the `EnsureAllFiles` sub-orchestration, we look up what files we need for this customer (cust1) and check to see which files have come through thus far. As long as we do *not* have the files we need, we loop within the orchestration. Each time waiting for an external `newfile` event to be thrown to let us know a new file has come through and should be processed. 80 | 81 | When we find we have all the files that constitute a "batch" for the customer, we call the `ValidateFileSet` activity function to process each file in the set and validate the structure of them according to our rules. 82 | 83 | When Validation completes successfully, all files from the batch are moved to a `valid-set` subfolder in the blob storage container. If validation fails (try removing a column in one of the lines in one of the files), the whole set gets moved to `invalid-set` 84 | 85 | #### Resetting Durable Execution 86 | 87 | Because of the persistent behavior of state for Durable Functions, if you need to reset the execution because something goes wrong it's not as simple as just re-running the function. To do this properly, you must: 88 | 89 | * **Delete the `DurableFunctionsHubHistory` Table** in the "General Purpose" Storage Account you created in Step 1 above. 90 | * Delete any files you uploaded to the `/inbound` directory of the blob storage container triggering the Functions. 91 | 92 | **Note**: after doing these steps you'll have to wait a minute or so before running either of the Durable Function implementations as the storage table creation will error with 409 CONFLICT while deletion takes place. 93 | 94 | ### "Classic" Function execution 95 | 96 | 1. Determine the "batch prefix" of the file that was dropped. This consists of the customer name (cust1), and a datetime stamp in the format YYYYMMDD_HHMM, making the batch prefix for the first batch in `sampledata` defined as `cust1_20171010_1112` 97 | 1. Check to see if we have all necessary files in blob storage with this prefix. 98 | 1. If we do, check to see if there's a lock entry in the `FileProcessingLocks` table of the General Purpose Storage Account containing this prefix. If so, bail. If not, create one, then call the `ValidateFunctionUrl` endpoint with the batch prefix as payload. 99 | 1. The Validate function gets the request & checks to see if the lock is marked as 'in progress'. If so, bail. If not, mark it as such and continue validating the files in the Blob Storage account which match the prefix passed in. 100 | 101 | When Validation completes successfully, all files from the batch are moved to a `valid-set` subfolder in the blob storage container. If validation fails (try removing a column in one of the lines in one of the files), the whole set gets moved to `invalid-set` 102 | 103 | #### Resetting Classic Execution 104 | 105 | * Delete the `FileProcessingLocks` table from the General Purpose Storage Account. 106 | * Delete any files you uploaded to the `/inbound` directory of the blob storage container triggering the Functions. 107 | 108 | **Note**: after doing these steps you'll have to wait a minute or so before running either of the Durable Function implementations as the storage table creation will error with 409 CONFLICT while deletion takes place. 109 | 110 | ### Logic Apps 111 | 112 | While not identically behaved, this repo also contains deployment scripts for two Logic App instances which perform roughly the same flow. 113 | 114 | #### Batch Processor 115 | 116 | This LA gets Storage Events from event grid, pulls off the full prefix of the file (also containing the URL), and sends this on to... 117 | 118 | #### Batch Receiver 119 | 120 | This receives events from the Processor and waits for 3 containing the same prefix to arrive before sending the batch on to the next step (you can change this to be whatever you want after deployment) 121 | 122 | ## Known issues 123 | 124 | ### Durable Functions 125 | 126 | * If you drop all the files in at once, there exists a race condition when the events fired from Event Grid hit the top-level Orchestrator endpoint; it doesn't execute `StartNewAsync` fast enough and instead of one instance per batch, you'll end up with multiple instances for the same prefix (even though we desire one instance per, acting like a singleton). 127 | 128 | ## Contributing 129 | 130 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 131 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 132 | the rights to use your contribution. For details, visit . 133 | 134 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 135 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 136 | provided by the bot. You will only need to do this once across all repos using our CLA. 137 | 138 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 139 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 140 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 141 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets Microsoft's [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)) of a security vulnerability, please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /images/ngroksubscription.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/Serverless-File-Validation/412bb343d803b4f2afc3edfe2b1fb15da300bde5/images/ngroksubscription.png -------------------------------------------------------------------------------- /sampledata/cust1_20171010_1112_type1.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem" 2 | "lorem","lorem","","" 3 | "lorem","lorem","","" 4 | "lorem","lorem","","" 5 | "lorem","lorem","","" -------------------------------------------------------------------------------- /sampledata/cust1_20171010_1112_type10.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem" 2 | "lorem","lorem","lorem" 3 | "lorem","lorem","lorem" 4 | "lorem","lorem","lorem" 5 | "lorem","lorem","lorem" 6 | "lorem","lorem","lorem" 7 | "lorem","lorem","lorem" 8 | "lorem","lorem","lorem" -------------------------------------------------------------------------------- /sampledata/cust1_20171010_1112_type2.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem" 2 | "lorem","lorem","","" 3 | "lorem","lorem","","" 4 | "lorem","lorem","","" 5 | "lorem","lorem","","" 6 | "lorem","lorem","","" -------------------------------------------------------------------------------- /sampledata/cust1_20171010_1112_type3.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 2 | "lorem","","","lorem","lorem","","","","","","","","","" -------------------------------------------------------------------------------- /sampledata/cust1_20171010_1112_type4.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem" 2 | "lorem","lorem","" 3 | "lorem","lorem","" -------------------------------------------------------------------------------- /sampledata/cust1_20171010_1112_type5.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem" 2 | "lorem","lorem" -------------------------------------------------------------------------------- /sampledata/cust1_20171010_1112_type7.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 2 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 3 | "lorem","lorem","lorem","lorem","","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 4 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 5 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 6 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" -------------------------------------------------------------------------------- /sampledata/cust1_20171010_1112_type8.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 2 | "lorem","lorem","","","","lorem","","","","","","","","","","","","","","","lorem" 3 | "lorem","lorem","","","","lorem","","","","","","","","","","","","","","","lorem" -------------------------------------------------------------------------------- /sampledata/cust1_20171010_1112_type9.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 2 | "lorem","","","lorem","lorem","","","","","","","","","" -------------------------------------------------------------------------------- /sampledata/cust1_20171011_1112_type1.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem" 2 | "lorem","lorem","","" 3 | "lorem","lorem","","" 4 | "lorem","lorem","","" 5 | "lorem","lorem","","" -------------------------------------------------------------------------------- /sampledata/cust1_20171011_1112_type10.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem" 2 | "lorem","lorem","lorem" 3 | "lorem","lorem","lorem" 4 | "lorem","lorem","lorem" 5 | "lorem","lorem","lorem" 6 | "lorem","lorem","lorem" 7 | "lorem","lorem","lorem" 8 | "lorem","lorem","lorem" -------------------------------------------------------------------------------- /sampledata/cust1_20171011_1112_type2.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem" 2 | "lorem","lorem","","" 3 | "lorem","lorem","","" 4 | "lorem","lorem","","" 5 | "lorem","lorem","","" 6 | "lorem","lorem","","" -------------------------------------------------------------------------------- /sampledata/cust1_20171011_1112_type3.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 2 | "lorem","","","lorem","lorem","","","","","","","","","" -------------------------------------------------------------------------------- /sampledata/cust1_20171011_1112_type4.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem" 2 | "lorem","lorem","" 3 | "lorem","lorem","" -------------------------------------------------------------------------------- /sampledata/cust1_20171011_1112_type5.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem" 2 | "lorem","lorem" -------------------------------------------------------------------------------- /sampledata/cust1_20171011_1112_type7.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 2 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 3 | "lorem","lorem","lorem","lorem","","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 4 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 5 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 6 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" -------------------------------------------------------------------------------- /sampledata/cust1_20171011_1112_type8.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" 2 | "lorem","lorem","","","","lorem","","","","","","","","","","","","","","","lorem" 3 | "lorem","lorem","","","","lorem","","","","","","","","","","","","","","","lorem" -------------------------------------------------------------------------------- /sampledata/cust1_20171011_1112_type9.csv: -------------------------------------------------------------------------------- 1 | "lorem","lorem","lorem","lorem","lorem" 2 | "lorem","","","lorem","lorem" --------------------------------------------------------------------------------