├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── LICENSE-CODE ├── README.md ├── SECURITY.md ├── azure-data-pipeline ├── data │ └── sample.csv ├── data_pipeline_ci_cd.yml ├── data_pipeline_test.yml ├── factorydata │ ├── arm-template-parameters-definition.json │ ├── dataset │ │ ├── PreparedDataset.json │ │ └── RawDataset.json │ ├── linkedService │ │ ├── AzureBlobStorage1.json │ │ ├── AzureDatabricks1.json │ │ └── AzureKeyVault1.json │ └── pipeline │ │ └── DataPipeline.json ├── notebooks │ ├── traindata.py │ └── transformData.py └── scripts │ ├── Invoke-ADFPipeline.ps1 │ └── Utilities.psm1 └── scripts ├── Invoke-ADFPipeline.ps1 └── Utilities.psm1 /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Aa][Rr][Mm]/ 27 | [Aa][Rr][Mm]64/ 28 | bld/ 29 | [Bb]in/ 30 | [Oo]bj/ 31 | [Ll]og/ 32 | [Ll]ogs/ 33 | 34 | # Visual Studio 2015/2017 cache/options directory 35 | .vs/ 36 | # Uncomment if you have tasks that create the project's static files in wwwroot 37 | #wwwroot/ 38 | 39 | # Visual Studio 2017 auto generated files 40 | Generated\ Files/ 41 | 42 | # MSTest test Results 43 | [Tt]est[Rr]esult*/ 44 | [Bb]uild[Ll]og.* 45 | 46 | # NUnit 47 | *.VisualState.xml 48 | TestResult.xml 49 | nunit-*.xml 50 | 51 | # Build Results of an ATL Project 52 | [Dd]ebugPS/ 53 | [Rr]eleasePS/ 54 | dlldata.c 55 | 56 | # Benchmark Results 57 | BenchmarkDotNet.Artifacts/ 58 | 59 | # .NET Core 60 | project.lock.json 61 | project.fragment.lock.json 62 | artifacts/ 63 | 64 | # StyleCop 65 | StyleCopReport.xml 66 | 67 | # Files built by Visual Studio 68 | *_i.c 69 | *_p.c 70 | *_h.h 71 | *.ilk 72 | *.meta 73 | *.obj 74 | *.iobj 75 | *.pch 76 | *.pdb 77 | *.ipdb 78 | *.pgc 79 | *.pgd 80 | *.rsp 81 | *.sbr 82 | *.tlb 83 | *.tli 84 | *.tlh 85 | *.tmp 86 | *.tmp_proj 87 | *_wpftmp.csproj 88 | *.log 89 | *.vspscc 90 | *.vssscc 91 | .builds 92 | *.pidb 93 | *.svclog 94 | *.scc 95 | 96 | # Chutzpah Test files 97 | _Chutzpah* 98 | 99 | # Visual C++ cache files 100 | ipch/ 101 | *.aps 102 | *.ncb 103 | *.opendb 104 | *.opensdf 105 | *.sdf 106 | *.cachefile 107 | *.VC.db 108 | *.VC.VC.opendb 109 | 110 | # Visual Studio profiler 111 | *.psess 112 | *.vsp 113 | *.vspx 114 | *.sap 115 | 116 | # Visual Studio Trace Files 117 | *.e2e 118 | 119 | # TFS 2012 Local Workspace 120 | $tf/ 121 | 122 | # Guidance Automation Toolkit 123 | *.gpState 124 | 125 | # ReSharper is a .NET coding add-in 126 | _ReSharper*/ 127 | *.[Rr]e[Ss]harper 128 | *.DotSettings.user 129 | 130 | # TeamCity is a build add-in 131 | _TeamCity* 132 | 133 | # DotCover is a Code Coverage Tool 134 | *.dotCover 135 | 136 | # AxoCover is a Code Coverage Tool 137 | .axoCover/* 138 | !.axoCover/settings.json 139 | 140 | # Visual Studio code coverage results 141 | *.coverage 142 | *.coveragexml 143 | 144 | # NCrunch 145 | _NCrunch_* 146 | .*crunch*.local.xml 147 | nCrunchTemp_* 148 | 149 | # MightyMoose 150 | *.mm.* 151 | AutoTest.Net/ 152 | 153 | # Web workbench (sass) 154 | .sass-cache/ 155 | 156 | # Installshield output folder 157 | [Ee]xpress/ 158 | 159 | # DocProject is a documentation generator add-in 160 | DocProject/buildhelp/ 161 | DocProject/Help/*.HxT 162 | DocProject/Help/*.HxC 163 | DocProject/Help/*.hhc 164 | DocProject/Help/*.hhk 165 | DocProject/Help/*.hhp 166 | DocProject/Help/Html2 167 | DocProject/Help/html 168 | 169 | # Click-Once directory 170 | publish/ 171 | 172 | # Publish Web Output 173 | *.[Pp]ublish.xml 174 | *.azurePubxml 175 | # Note: Comment the next line if you want to checkin your web deploy settings, 176 | # but database connection strings (with potential passwords) will be unencrypted 177 | *.pubxml 178 | *.publishproj 179 | 180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 181 | # checkin your Azure Web App publish settings, but sensitive information contained 182 | # in these scripts will be unencrypted 183 | PublishScripts/ 184 | 185 | # NuGet Packages 186 | *.nupkg 187 | # NuGet Symbol Packages 188 | *.snupkg 189 | # The packages folder can be ignored because of Package Restore 190 | **/[Pp]ackages/* 191 | # except build/, which is used as an MSBuild target. 192 | !**/[Pp]ackages/build/ 193 | # Uncomment if necessary however generally it will be regenerated when needed 194 | #!**/[Pp]ackages/repositories.config 195 | # NuGet v3's project.json files produces more ignorable files 196 | *.nuget.props 197 | *.nuget.targets 198 | 199 | # Microsoft Azure Build Output 200 | csx/ 201 | *.build.csdef 202 | 203 | # Microsoft Azure Emulator 204 | ecf/ 205 | rcf/ 206 | 207 | # Windows Store app package directories and files 208 | AppPackages/ 209 | BundleArtifacts/ 210 | Package.StoreAssociation.xml 211 | _pkginfo.txt 212 | *.appx 213 | *.appxbundle 214 | *.appxupload 215 | 216 | # Visual Studio cache files 217 | # files ending in .cache can be ignored 218 | *.[Cc]ache 219 | # but keep track of directories ending in .cache 220 | !?*.[Cc]ache/ 221 | 222 | # Others 223 | ClientBin/ 224 | ~$* 225 | *~ 226 | *.dbmdl 227 | *.dbproj.schemaview 228 | *.jfm 229 | *.pfx 230 | *.publishsettings 231 | orleans.codegen.cs 232 | 233 | # Including strong name files can present a security risk 234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 235 | #*.snk 236 | 237 | # Since there are multiple workflows, uncomment next line to ignore bower_components 238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 239 | #bower_components/ 240 | 241 | # RIA/Silverlight projects 242 | Generated_Code/ 243 | 244 | # Backup & report files from converting an old project file 245 | # to a newer Visual Studio version. Backup files are not needed, 246 | # because we have git ;-) 247 | _UpgradeReport_Files/ 248 | Backup*/ 249 | UpgradeLog*.XML 250 | UpgradeLog*.htm 251 | ServiceFabricBackup/ 252 | *.rptproj.bak 253 | 254 | # SQL Server files 255 | *.mdf 256 | *.ldf 257 | *.ndf 258 | 259 | # Business Intelligence projects 260 | *.rdl.data 261 | *.bim.layout 262 | *.bim_*.settings 263 | *.rptproj.rsuser 264 | *- [Bb]ackup.rdl 265 | *- [Bb]ackup ([0-9]).rdl 266 | *- [Bb]ackup ([0-9][0-9]).rdl 267 | 268 | # Microsoft Fakes 269 | FakesAssemblies/ 270 | 271 | # GhostDoc plugin setting file 272 | *.GhostDoc.xml 273 | 274 | # Node.js Tools for Visual Studio 275 | .ntvs_analysis.dat 276 | node_modules/ 277 | 278 | # Visual Studio 6 build log 279 | *.plg 280 | 281 | # Visual Studio 6 workspace options file 282 | *.opt 283 | 284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 285 | *.vbw 286 | 287 | # Visual Studio LightSwitch build output 288 | **/*.HTMLClient/GeneratedArtifacts 289 | **/*.DesktopClient/GeneratedArtifacts 290 | **/*.DesktopClient/ModelManifest.xml 291 | **/*.Server/GeneratedArtifacts 292 | **/*.Server/ModelManifest.xml 293 | _Pvt_Extensions 294 | 295 | # Paket dependency manager 296 | .paket/paket.exe 297 | paket-files/ 298 | 299 | # FAKE - F# Make 300 | .fake/ 301 | 302 | # CodeRush personal settings 303 | .cr/personal 304 | 305 | # Python Tools for Visual Studio (PTVS) 306 | __pycache__/ 307 | *.pyc 308 | 309 | # Cake - Uncomment if you are using it 310 | # tools/** 311 | # !tools/packages.config 312 | 313 | # Tabs Studio 314 | *.tss 315 | 316 | # Telerik's JustMock configuration file 317 | *.jmconfig 318 | 319 | # BizTalk build output 320 | *.btp.cs 321 | *.btm.cs 322 | *.odx.cs 323 | *.xsd.cs 324 | 325 | # OpenCover UI analysis results 326 | OpenCover/ 327 | 328 | # Azure Stream Analytics local run output 329 | ASALocalRun/ 330 | 331 | # MSBuild Binary and Structured Log 332 | *.binlog 333 | 334 | # NVidia Nsight GPU debugger configuration file 335 | *.nvuser 336 | 337 | # MFractors (Xamarin productivity tool) working folder 338 | .mfractor/ 339 | 340 | # Local History for Visual Studio 341 | .localhistory/ 342 | 343 | # BeatPulse healthcheck temp database 344 | healthchecksdb 345 | 346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 347 | MigrationBackup/ 348 | 349 | # Ionide (cross platform F# VS Code tools) working folder 350 | .ionide/ 351 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More_considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution 4.0 International Public License 58 | 59 | By exercising the Licensed Rights (defined below), You accept and agree 60 | to be bound by the terms and conditions of this Creative Commons 61 | Attribution 4.0 International Public License ("Public License"). To the 62 | extent this Public License may be interpreted as a contract, You are 63 | granted the Licensed Rights in consideration of Your acceptance of 64 | these terms and conditions, and the Licensor grants You such rights in 65 | consideration of benefits the Licensor receives from making the 66 | Licensed Material available under these terms and conditions. 67 | 68 | 69 | Section 1 -- Definitions. 70 | 71 | a. Adapted Material means material subject to Copyright and Similar 72 | Rights that is derived from or based upon the Licensed Material 73 | and in which the Licensed Material is translated, altered, 74 | arranged, transformed, or otherwise modified in a manner requiring 75 | permission under the Copyright and Similar Rights held by the 76 | Licensor. For purposes of this Public License, where the Licensed 77 | Material is a musical work, performance, or sound recording, 78 | Adapted Material is always produced where the Licensed Material is 79 | synched in timed relation with a moving image. 80 | 81 | b. Adapter's License means the license You apply to Your Copyright 82 | and Similar Rights in Your contributions to Adapted Material in 83 | accordance with the terms and conditions of this Public License. 84 | 85 | c. Copyright and Similar Rights means copyright and/or similar rights 86 | closely related to copyright including, without limitation, 87 | performance, broadcast, sound recording, and Sui Generis Database 88 | Rights, without regard to how the rights are labeled or 89 | categorized. For purposes of this Public License, the rights 90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 91 | Rights. 92 | 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. Share means to provide material to the public by any means or 116 | process that requires permission under the Licensed Rights, such 117 | as reproduction, public display, public performance, distribution, 118 | dissemination, communication, or importation, and to make material 119 | available to the public including in ways that members of the 120 | public may access the material from a place and at a time 121 | individually chosen by them. 122 | 123 | j. Sui Generis Database Rights means rights other than copyright 124 | resulting from Directive 96/9/EC of the European Parliament and of 125 | the Council of 11 March 1996 on the legal protection of databases, 126 | as amended and/or succeeded, as well as other essentially 127 | equivalent rights anywhere in the world. 128 | 129 | k. You means the individual or entity exercising the Licensed Rights 130 | under this Public License. Your has a corresponding meaning. 131 | 132 | 133 | Section 2 -- Scope. 134 | 135 | a. License grant. 136 | 137 | 1. Subject to the terms and conditions of this Public License, 138 | the Licensor hereby grants You a worldwide, royalty-free, 139 | non-sublicensable, non-exclusive, irrevocable license to 140 | exercise the Licensed Rights in the Licensed Material to: 141 | 142 | a. reproduce and Share the Licensed Material, in whole or 143 | in part; and 144 | 145 | b. produce, reproduce, and Share Adapted Material. 146 | 147 | 2. Exceptions and Limitations. For the avoidance of doubt, where 148 | Exceptions and Limitations apply to Your use, this Public 149 | License does not apply, and You do not need to comply with 150 | its terms and conditions. 151 | 152 | 3. Term. The term of this Public License is specified in Section 153 | 6(a). 154 | 155 | 4. Media and formats; technical modifications allowed. The 156 | Licensor authorizes You to exercise the Licensed Rights in 157 | all media and formats whether now known or hereafter created, 158 | and to make technical modifications necessary to do so. The 159 | Licensor waives and/or agrees not to assert any right or 160 | authority to forbid You from making technical modifications 161 | necessary to exercise the Licensed Rights, including 162 | technical modifications necessary to circumvent Effective 163 | Technological Measures. For purposes of this Public License, 164 | simply making modifications authorized by this Section 2(a) 165 | (4) never produces Adapted Material. 166 | 167 | 5. Downstream recipients. 168 | 169 | a. Offer from the Licensor -- Licensed Material. Every 170 | recipient of the Licensed Material automatically 171 | receives an offer from the Licensor to exercise the 172 | Licensed Rights under the terms and conditions of this 173 | Public License. 174 | 175 | b. No downstream restrictions. You may not offer or impose 176 | any additional or different terms or conditions on, or 177 | apply any Effective Technological Measures to, the 178 | Licensed Material if doing so restricts exercise of the 179 | Licensed Rights by any recipient of the Licensed 180 | Material. 181 | 182 | 6. No endorsement. Nothing in this Public License constitutes or 183 | may be construed as permission to assert or imply that You 184 | are, or that Your use of the Licensed Material is, connected 185 | with, or sponsored, endorsed, or granted official status by, 186 | the Licensor or others designated to receive attribution as 187 | provided in Section 3(a)(1)(A)(i). 188 | 189 | b. Other rights. 190 | 191 | 1. Moral rights, such as the right of integrity, are not 192 | licensed under this Public License, nor are publicity, 193 | privacy, and/or other similar personality rights; however, to 194 | the extent possible, the Licensor waives and/or agrees not to 195 | assert any such rights held by the Licensor to the limited 196 | extent necessary to allow You to exercise the Licensed 197 | Rights, but not otherwise. 198 | 199 | 2. Patent and trademark rights are not licensed under this 200 | Public License. 201 | 202 | 3. To the extent possible, the Licensor waives any right to 203 | collect royalties from You for the exercise of the Licensed 204 | Rights, whether directly or through a collecting society 205 | under any voluntary or waivable statutory or compulsory 206 | licensing scheme. In all other cases the Licensor expressly 207 | reserves any right to collect such royalties. 208 | 209 | 210 | Section 3 -- License Conditions. 211 | 212 | Your exercise of the Licensed Rights is expressly made subject to the 213 | following conditions. 214 | 215 | a. Attribution. 216 | 217 | 1. If You Share the Licensed Material (including in modified 218 | form), You must: 219 | 220 | a. retain the following if it is supplied by the Licensor 221 | with the Licensed Material: 222 | 223 | i. identification of the creator(s) of the Licensed 224 | Material and any others designated to receive 225 | attribution, in any reasonable manner requested by 226 | the Licensor (including by pseudonym if 227 | designated); 228 | 229 | ii. a copyright notice; 230 | 231 | iii. a notice that refers to this Public License; 232 | 233 | iv. a notice that refers to the disclaimer of 234 | warranties; 235 | 236 | v. a URI or hyperlink to the Licensed Material to the 237 | extent reasonably practicable; 238 | 239 | b. indicate if You modified the Licensed Material and 240 | retain an indication of any previous modifications; and 241 | 242 | c. indicate the Licensed Material is licensed under this 243 | Public License, and include the text of, or the URI or 244 | hyperlink to, this Public License. 245 | 246 | 2. You may satisfy the conditions in Section 3(a)(1) in any 247 | reasonable manner based on the medium, means, and context in 248 | which You Share the Licensed Material. For example, it may be 249 | reasonable to satisfy the conditions by providing a URI or 250 | hyperlink to a resource that includes the required 251 | information. 252 | 253 | 3. If requested by the Licensor, You must remove any of the 254 | information required by Section 3(a)(1)(A) to the extent 255 | reasonably practicable. 256 | 257 | 4. If You Share Adapted Material You produce, the Adapter's 258 | License You apply must not prevent recipients of the Adapted 259 | Material from complying with this Public License. 260 | 261 | 262 | Section 4 -- Sui Generis Database Rights. 263 | 264 | Where the Licensed Rights include Sui Generis Database Rights that 265 | apply to Your use of the Licensed Material: 266 | 267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 268 | to extract, reuse, reproduce, and Share all or a substantial 269 | portion of the contents of the database; 270 | 271 | b. if You include all or a substantial portion of the database 272 | contents in a database in which You have Sui Generis Database 273 | Rights, then the database in which You have Sui Generis Database 274 | Rights (but not its individual contents) is Adapted Material; and 275 | 276 | c. You must comply with the conditions in Section 3(a) if You Share 277 | all or a substantial portion of the contents of the database. 278 | 279 | For the avoidance of doubt, this Section 4 supplements and does not 280 | replace Your obligations under this Public License where the Licensed 281 | Rights include other Copyright and Similar Rights. 282 | 283 | 284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 285 | 286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 296 | 297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 306 | 307 | c. The disclaimer of warranties and limitation of liability provided 308 | above shall be interpreted in a manner that, to the extent 309 | possible, most closely approximates an absolute disclaimer and 310 | waiver of all liability. 311 | 312 | 313 | Section 6 -- Term and Termination. 314 | 315 | a. This Public License applies for the term of the Copyright and 316 | Similar Rights licensed here. However, if You fail to comply with 317 | this Public License, then Your rights under this Public License 318 | terminate automatically. 319 | 320 | b. Where Your right to use the Licensed Material has terminated under 321 | Section 6(a), it reinstates: 322 | 323 | 1. automatically as of the date the violation is cured, provided 324 | it is cured within 30 days of Your discovery of the 325 | violation; or 326 | 327 | 2. upon express reinstatement by the Licensor. 328 | 329 | For the avoidance of doubt, this Section 6(b) does not affect any 330 | right the Licensor may have to seek remedies for Your violations 331 | of this Public License. 332 | 333 | c. For the avoidance of doubt, the Licensor may also offer the 334 | Licensed Material under separate terms or conditions or stop 335 | distributing the Licensed Material at any time; however, doing so 336 | will not terminate this Public License. 337 | 338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 339 | License. 340 | 341 | 342 | Section 7 -- Other Terms and Conditions. 343 | 344 | a. The Licensor shall not be bound by any additional or different 345 | terms or conditions communicated by You unless expressly agreed. 346 | 347 | b. Any arrangements, understandings, or agreements regarding the 348 | Licensed Material not stated herein are separate from and 349 | independent of the terms and conditions of this Public License. 350 | 351 | 352 | Section 8 -- Interpretation. 353 | 354 | a. For the avoidance of doubt, this Public License does not, and 355 | shall not be interpreted to, reduce, limit, restrict, or impose 356 | conditions on any use of the Licensed Material that could lawfully 357 | be made without permission under this Public License. 358 | 359 | b. To the extent possible, if any provision of this Public License is 360 | deemed unenforceable, it shall be automatically reformed to the 361 | minimum extent necessary to make it enforceable. If the provision 362 | cannot be reformed, it shall be severed from this Public License 363 | without affecting the enforceability of the remaining terms and 364 | conditions. 365 | 366 | c. No term or condition of this Public License will be waived and no 367 | failure to comply consented to unless expressly agreed to by the 368 | Licensor. 369 | 370 | d. Nothing in this Public License constitutes or may be interpreted 371 | as a limitation upon, or waiver of, any privileges and immunities 372 | that apply to the Licensor or You, including from the legal 373 | processes of any jurisdiction or authority. 374 | 375 | 376 | ======================================================================= 377 | 378 | Creative Commons is not a party to its public 379 | licenses. Notwithstanding, Creative Commons may elect to apply one of 380 | its public licenses to material it publishes and in those instances 381 | will be considered the “Licensor.” The text of the Creative Commons 382 | public licenses is dedicated to the public domain under the CC0 Public 383 | Domain Dedication. Except for the limited purpose of indicating that 384 | material is shared under a Creative Commons public license or as 385 | otherwise permitted by the Creative Commons policies published at 386 | creativecommons.org/policies, Creative Commons does not authorize the 387 | use of the trademark "Creative Commons" or any other trademark or logo 388 | of Creative Commons without its prior written consent including, 389 | without limitation, in connection with any unauthorized modifications 390 | to any of its public licenses or any other arrangements, 391 | understandings, or agreements concerning use of licensed material. For 392 | the avoidance of doubt, this paragraph does not form part of the 393 | public licenses. 394 | 395 | Creative Commons may be contacted at creativecommons.org. -------------------------------------------------------------------------------- /LICENSE-CODE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributing 3 | 4 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 5 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 6 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 7 | 8 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 9 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 10 | provided by the bot. You will only need to do this once across all repos using our CLA. 11 | 12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 14 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 15 | 16 | # Legal Notices 17 | 18 | Microsoft and any contributors grant you a license to the Microsoft documentation and other content 19 | in this repository under the [Creative Commons Attribution 4.0 International Public License](https://creativecommons.org/licenses/by/4.0/legalcode), 20 | see the [LICENSE](LICENSE) file, and grant you a license to any code in the repository under the [MIT License](https://opensource.org/licenses/MIT), see the 21 | [LICENSE-CODE](LICENSE-CODE) file. 22 | 23 | Microsoft, Windows, Microsoft Azure and/or other Microsoft products and services referenced in the documentation 24 | may be either trademarks or registered trademarks of Microsoft in the United States and/or other countries. 25 | The licenses for this project do not grant you rights to use any Microsoft names, logos, or trademarks. 26 | Microsoft's general trademark guidelines can be found at http://go.microsoft.com/fwlink/?LinkID=254653. 27 | 28 | Privacy information can be found at https://privacy.microsoft.com/en-us/ 29 | 30 | Microsoft and any contributors reserve all other rights, whether under their respective copyrights, patents, 31 | or trademarks, whether by implication, estoppel or otherwise. 32 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /azure-data-pipeline/data_pipeline_ci_cd.yml: -------------------------------------------------------------------------------- 1 | name: CICD 2 | pr: 3 | branches: 4 | include: 5 | - master 6 | - adf_publish 7 | trigger: 8 | branches: 9 | include: 10 | - master 11 | paths: 12 | include: 13 | - scripts/ 14 | 15 | variables: 16 | - group: datapipeline-vg 17 | - group: keys-vg 18 | pool: 19 | vmImage: ubuntu-latest 20 | 21 | 22 | stages: 23 | - stage: 'CI' 24 | displayName: 'CI' 25 | jobs: 26 | - job: "CI_Job" 27 | displayName: "CI Job" 28 | # The CI stage produces two artifacts (notebooks and ADF pipelines). 29 | # The pipelines Azure Resource Manager templates are stored in a technical branch "adf_publish" 30 | steps: 31 | - checkout: self 32 | - script: dir $(Build.SourcesDirectory)/$(Build.Repository.Name) 33 | - publish: $(Build.SourcesDirectory)/$(Build.Repository.Name)/azure-data-pipeline/notebooks 34 | artifact: notebooks 35 | - checkout: git://${{variables['System.TeamProject']}}@adf_publish 36 | - script: dir $(Build.SourcesDirectory)/$(Build.Repository.Name) 37 | - publish: $(Build.SourcesDirectory)/$(Build.Repository.Name)/$(DATA_FACTORY_DEV_NAME) 38 | artifact: adf-pipelines 39 | - stage: 'CD' 40 | displayName: 'CD' 41 | jobs: 42 | - deployment: "Deploy_to_Databricks" 43 | displayName: 'Deploy to Databricks' 44 | timeoutInMinutes: 0 45 | environment: qa 46 | strategy: 47 | runOnce: 48 | deploy: 49 | steps: 50 | - task: UsePythonVersion@0 51 | inputs: 52 | versionSpec: '3.x' 53 | addToPath: true 54 | architecture: 'x64' 55 | displayName: 'Use Python3' 56 | # Need to install DevOps for Azure Databricks extension 57 | - task: configuredatabricks@0 58 | inputs: 59 | url: '$(DATABRICKS_URL)' 60 | token: '$(databricks-token)' 61 | displayName: 'Configure Databricks CLI' 62 | 63 | - task: deploynotebooks@0 64 | inputs: 65 | notebooksFolderPath: '$(Pipeline.Workspace)/notebooks' 66 | workspaceFolder: '/Shared' 67 | displayName: 'Deploy (copy) data processing notebook to the Databricks cluster' 68 | - deployment: "Deploy_to_ADF" 69 | displayName: 'Deploy to ADF' 70 | timeoutInMinutes: 0 71 | environment: qa 72 | strategy: 73 | runOnce: 74 | deploy: 75 | steps: 76 | - task: AzureResourceGroupDeployment@2 77 | displayName: 'Deploy ADF resources' 78 | inputs: 79 | azureSubscription: $(AZURE_RM_CONNECTION) 80 | resourceGroupName: $(RESOURCE_GROUP) 81 | location: $(LOCATION) 82 | csmFile: '$(Pipeline.Workspace)/adf-pipelines/ARMTemplateForFactory.json' 83 | csmParametersFile: '$(Pipeline.Workspace)/adf-pipelines/ARMTemplateParametersForFactory.json' 84 | overrideParameters: -factoryName "$(DATA_FACTORY_TEST_NAME)" 85 | -DataPipeline_properties_variables_storage_account_name_defaultValue "$(STORAGE_ACCOUNT_NAME)" 86 | -DataPipeline_properties_variables_storage_container_name_defaultValue "$(STORAGE_CONTAINER_NAME)" 87 | -------------------------------------------------------------------------------- /azure-data-pipeline/data_pipeline_test.yml: -------------------------------------------------------------------------------- 1 | name: Test pipeline 2 | pr: none 3 | trigger: 4 | branches: 5 | include: 6 | - master 7 | paths: 8 | include: 9 | - scripts/ 10 | 11 | variables: 12 | - group: datapipeline-vg 13 | pool: 14 | vmImage: ubuntu-latest 15 | 16 | 17 | stages: 18 | - stage: 'Test' 19 | displayName: 'Test' 20 | jobs: 21 | - job: "Integration_test_job" 22 | displayName: "Integration test job" 23 | #dependsOn: [Deploy_to_Databricks, Deploy_to_ADF] 24 | pool: 25 | vmImage: 'ubuntu-latest' 26 | timeoutInMinutes: 0 27 | steps: 28 | - task: AzurePowerShell@4 29 | displayName: 'Execute ADF Pipeline' 30 | inputs: 31 | azureSubscription: $(AZURE_RM_CONNECTION) 32 | ScriptPath: '$(Build.SourcesDirectory)/azure-data-pipeline/scripts/Invoke-ADFPipeline.ps1' 33 | ScriptArguments: '-ResourceGroupName $(RESOURCE_GROUP) -DataFactoryName $(DATA_FACTORY_TEST_NAME) -PipelineName $(ADF_PIPELINE_NAME)' 34 | azurePowerShellVersion: LatestVersion -------------------------------------------------------------------------------- /azure-data-pipeline/factorydata/arm-template-parameters-definition.json: -------------------------------------------------------------------------------- 1 | { 2 | "Microsoft.DataFactory/factories/pipelines": { 3 | "properties": { 4 | "variables": { 5 | "*": { 6 | "defaultValue": "=" 7 | } 8 | } 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /azure-data-pipeline/factorydata/dataset/PreparedDataset.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "PreparedDataset", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "AzureBlobStorage1", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobStorageLocation", 13 | "container": "prepareddata" 14 | }, 15 | "columnDelimiter": ",", 16 | "escapeChar": "\\", 17 | "quoteChar": "\"" 18 | }, 19 | "schema": [] 20 | }, 21 | "type": "Microsoft.DataFactory/factories/datasets" 22 | } -------------------------------------------------------------------------------- /azure-data-pipeline/factorydata/dataset/RawDataset.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "RawDataset", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "AzureBlobStorage1", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobStorageLocation", 13 | "container": "rawdata" 14 | }, 15 | "columnDelimiter": ",", 16 | "escapeChar": "\\", 17 | "quoteChar": "\"" 18 | }, 19 | "schema": [ 20 | { 21 | "type": "String" 22 | }, 23 | { 24 | "type": "String" 25 | }, 26 | { 27 | "type": "String" 28 | }, 29 | { 30 | "type": "String" 31 | }, 32 | { 33 | "type": "String" 34 | }, 35 | { 36 | "type": "String" 37 | }, 38 | { 39 | "type": "String" 40 | }, 41 | { 42 | "type": "String" 43 | }, 44 | { 45 | "type": "String" 46 | }, 47 | { 48 | "type": "String" 49 | }, 50 | { 51 | "type": "String" 52 | }, 53 | { 54 | "type": "String" 55 | }, 56 | { 57 | "type": "String" 58 | }, 59 | { 60 | "type": "String" 61 | }, 62 | { 63 | "type": "String" 64 | }, 65 | { 66 | "type": "String" 67 | }, 68 | { 69 | "type": "String" 70 | }, 71 | { 72 | "type": "String" 73 | }, 74 | { 75 | "type": "String" 76 | }, 77 | { 78 | "type": "String" 79 | }, 80 | { 81 | "type": "String" 82 | }, 83 | { 84 | "type": "String" 85 | }, 86 | { 87 | "type": "String" 88 | }, 89 | { 90 | "type": "String" 91 | }, 92 | { 93 | "type": "String" 94 | }, 95 | { 96 | "type": "String" 97 | }, 98 | { 99 | "type": "String" 100 | }, 101 | { 102 | "type": "String" 103 | }, 104 | { 105 | "type": "String" 106 | }, 107 | { 108 | "type": "String" 109 | }, 110 | { 111 | "type": "String" 112 | }, 113 | { 114 | "type": "String" 115 | }, 116 | { 117 | "type": "String" 118 | }, 119 | { 120 | "type": "String" 121 | }, 122 | { 123 | "type": "String" 124 | }, 125 | { 126 | "type": "String" 127 | }, 128 | { 129 | "type": "String" 130 | }, 131 | { 132 | "type": "String" 133 | }, 134 | { 135 | "type": "String" 136 | }, 137 | { 138 | "type": "String" 139 | }, 140 | { 141 | "type": "String" 142 | }, 143 | { 144 | "type": "String" 145 | }, 146 | { 147 | "type": "String" 148 | }, 149 | { 150 | "type": "String" 151 | }, 152 | { 153 | "type": "String" 154 | }, 155 | { 156 | "type": "String" 157 | }, 158 | { 159 | "type": "String" 160 | }, 161 | { 162 | "type": "String" 163 | }, 164 | { 165 | "type": "String" 166 | }, 167 | { 168 | "type": "String" 169 | }, 170 | { 171 | "type": "String" 172 | }, 173 | { 174 | "type": "String" 175 | }, 176 | { 177 | "type": "String" 178 | }, 179 | { 180 | "type": "String" 181 | }, 182 | { 183 | "type": "String" 184 | }, 185 | { 186 | "type": "String" 187 | }, 188 | { 189 | "type": "String" 190 | }, 191 | { 192 | "type": "String" 193 | }, 194 | { 195 | "type": "String" 196 | }, 197 | { 198 | "type": "String" 199 | }, 200 | { 201 | "type": "String" 202 | }, 203 | { 204 | "type": "String" 205 | }, 206 | { 207 | "type": "String" 208 | }, 209 | { 210 | "type": "String" 211 | }, 212 | { 213 | "type": "String" 214 | } 215 | ] 216 | }, 217 | "type": "Microsoft.DataFactory/factories/datasets" 218 | } -------------------------------------------------------------------------------- /azure-data-pipeline/factorydata/linkedService/AzureBlobStorage1.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "AzureBlobStorage1", 3 | "type": "Microsoft.DataFactory/factories/linkedservices", 4 | "properties": { 5 | "annotations": [], 6 | "type": "AzureBlobStorage", 7 | "typeProperties": { 8 | "connectionString": { 9 | "type": "AzureKeyVaultSecret", 10 | "store": { 11 | "referenceName": "AzureKeyVault1", 12 | "type": "LinkedServiceReference" 13 | }, 14 | "secretName": "StorageConnectString" 15 | } 16 | } 17 | } 18 | } -------------------------------------------------------------------------------- /azure-data-pipeline/factorydata/linkedService/AzureDatabricks1.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "AzureDatabricks1", 3 | "properties": { 4 | "annotations": [], 5 | "type": "AzureDatabricks", 6 | "typeProperties": { 7 | "domain": "https://adb-1003029183931884.4.azuredatabricks.net", 8 | "accessToken": { 9 | "type": "AzureKeyVaultSecret", 10 | "store": { 11 | "referenceName": "AzureKeyVault1", 12 | "type": "LinkedServiceReference" 13 | }, 14 | "secretName": "databricks-token" 15 | }, 16 | "existingClusterId": "0515-102917-bonus843" 17 | } 18 | }, 19 | "type": "Microsoft.DataFactory/factories/linkedservices" 20 | } -------------------------------------------------------------------------------- /azure-data-pipeline/factorydata/linkedService/AzureKeyVault1.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "AzureKeyVault1", 3 | "type": "Microsoft.DataFactory/factories/linkedservices", 4 | "properties": { 5 | "annotations": [], 6 | "type": "AzureKeyVault", 7 | "typeProperties": { 8 | "baseUrl": "https://yourbase.vault.azure.net/" 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /azure-data-pipeline/factorydata/pipeline/DataPipeline.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "DataPipeline", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "TransformData", 7 | "type": "DatabricksNotebook", 8 | "dependsOn": [ 9 | { 10 | "activity": "CopyData", 11 | "dependencyConditions": [ 12 | "Succeeded" 13 | ] 14 | } 15 | ], 16 | "policy": { 17 | "timeout": "7.00:00:00", 18 | "retry": 0, 19 | "retryIntervalInSeconds": 30, 20 | "secureOutput": false, 21 | "secureInput": false 22 | }, 23 | "userProperties": [], 24 | "typeProperties": { 25 | "notebookPath": "/Shared/transformData", 26 | "baseParameters": { 27 | "input": { 28 | "value": "@pipeline().parameters.datafile", 29 | "type": "Expression" 30 | }, 31 | "storage_account_name": { 32 | "value": "@variables('storage_account_name')", 33 | "type": "Expression" 34 | }, 35 | "storage_container_name": { 36 | "value": "@variables('storage_container_name')", 37 | "type": "Expression" 38 | } 39 | } 40 | }, 41 | "linkedServiceName": { 42 | "referenceName": "AzureDatabricks1", 43 | "type": "LinkedServiceReference" 44 | } 45 | }, 46 | { 47 | "name": "CopyData", 48 | "type": "Copy", 49 | "dependsOn": [], 50 | "policy": { 51 | "timeout": "7.00:00:00", 52 | "retry": 0, 53 | "retryIntervalInSeconds": 30, 54 | "secureOutput": false, 55 | "secureInput": false 56 | }, 57 | "userProperties": [], 58 | "typeProperties": { 59 | "source": { 60 | "type": "DelimitedTextSource", 61 | "storeSettings": { 62 | "type": "AzureBlobStorageReadSettings", 63 | "recursive": true, 64 | "wildcardFileName": "*.csv", 65 | "enablePartitionDiscovery": false 66 | }, 67 | "formatSettings": { 68 | "type": "DelimitedTextReadSettings" 69 | } 70 | }, 71 | "sink": { 72 | "type": "DelimitedTextSink", 73 | "storeSettings": { 74 | "type": "AzureBlobStorageWriteSettings", 75 | "copyBehavior": "PreserveHierarchy" 76 | }, 77 | "formatSettings": { 78 | "type": "DelimitedTextWriteSettings", 79 | "quoteAllText": true, 80 | "fileExtension": ".txt" 81 | } 82 | }, 83 | "enableStaging": false 84 | }, 85 | "inputs": [ 86 | { 87 | "referenceName": "RawDataset", 88 | "type": "DatasetReference" 89 | } 90 | ], 91 | "outputs": [ 92 | { 93 | "referenceName": "PreparedDataset", 94 | "type": "DatasetReference" 95 | } 96 | ] 97 | }, 98 | { 99 | "name": "TrainModel", 100 | "type": "DatabricksNotebook", 101 | "dependsOn": [ 102 | { 103 | "activity": "TransformData", 104 | "dependencyConditions": [ 105 | "Succeeded" 106 | ] 107 | } 108 | ], 109 | "policy": { 110 | "timeout": "7.00:00:00", 111 | "retry": 0, 112 | "retryIntervalInSeconds": 30, 113 | "secureOutput": false, 114 | "secureInput": false 115 | }, 116 | "userProperties": [], 117 | "typeProperties": { 118 | "notebookPath": "/Shared/traindata", 119 | "baseParameters": { 120 | "input": { 121 | "value": "@pipeline().parameters.datafile", 122 | "type": "Expression" 123 | }, 124 | "storage_account_name": { 125 | "value": "@variables('storage_account_name')", 126 | "type": "Expression" 127 | }, 128 | "storage_container_name": { 129 | "value": "@variables('storage_container_name')", 130 | "type": "Expression" 131 | } 132 | } 133 | }, 134 | "linkedServiceName": { 135 | "referenceName": "AzureDatabricks1", 136 | "type": "LinkedServiceReference" 137 | } 138 | } 139 | ], 140 | "parameters": { 141 | "datafile": { 142 | "type": "string", 143 | "defaultValue": "sample.csv" 144 | } 145 | }, 146 | "variables": { 147 | "storage_account_name": { 148 | "type": "String", 149 | "defaultValue": "yourstorageaccount" 150 | }, 151 | "storage_container_name": { 152 | "type": "String", 153 | "defaultValue": "prepareddata" 154 | } 155 | }, 156 | "annotations": [] 157 | }, 158 | "type": "Microsoft.DataFactory/factories/pipelines" 159 | } -------------------------------------------------------------------------------- /azure-data-pipeline/notebooks/traindata.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | import pickle 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import seaborn as seabornInstance 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.linear_model import LinearRegression 9 | from sklearn import metrics 10 | 11 | #dbutils.widgets.text("input", "","") 12 | #datafile = dbutils.widgets.get("input") 13 | datafile = "transformed.csv" 14 | storage_account_name = getArgument("storage_account_name") 15 | storage_container_name = getArgument("storage_container_name") 16 | 17 | mount_point = "/mnt/prepared" 18 | if not any(mount.mountPoint == mount_point for mount in dbutils.fs.mounts()): 19 | dbutils.fs.mount( 20 | source = "wasbs://"+storage_container_name+"@"+storage_account_name+".blob.core.windows.net", 21 | mount_point = mount_point, 22 | extra_configs = {"fs.azure.account.key."+storage_account_name+".blob.core.windows.net":dbutils.secrets.get(scope = "testscope", key = "StorageKey")}) 23 | 24 | dataset = pd.read_csv("/dbfs/"+mount_point+"/"+datafile) 25 | X = dataset['MinTemp'].values.reshape(-1,1) 26 | y = dataset['MaxTemp'].values.reshape(-1,1) 27 | 28 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 29 | regressor = LinearRegression() 30 | regressor.fit(X_train, y_train) 31 | print("Model trained.") 32 | 33 | #To retrieve the intercept: 34 | print("Regressor intercept: %f" % regressor.intercept_) 35 | #For retrieving the slope: 36 | print("Regressor coef: %f" % regressor.coef_) 37 | 38 | filepath_to_save = '/dbfs' + mount_point + '/regression.pkl' 39 | 40 | s = pickle.dump(regressor, open(filepath_to_save, "wb")) 41 | 42 | 43 | # COMMAND ---------- 44 | 45 | -------------------------------------------------------------------------------- /azure-data-pipeline/notebooks/transformData.py: -------------------------------------------------------------------------------- 1 | # get storage account name, container name from input 2 | dbutils.widgets.text("input", "","") 3 | datafile = dbutils.widgets.get("input") 4 | storage_account_name = getArgument("storage_account_name") 5 | storage_container_name = getArgument("storage_container_name") 6 | 7 | # mount the blob storage that represents the target data 8 | mount_point = "/mnt/prepared" 9 | if not any(mount.mountPoint == mount_point for mount in dbutils.fs.mounts()): 10 | dbutils.fs.mount( 11 | source = "wasbs://"+storage_container_name+"@"+storage_account_name+".blob.core.windows.net", 12 | mount_point = mount_point, 13 | extra_configs = {"fs.azure.account.key."+storage_account_name+".blob.core.windows.net":dbutils.secrets.get(scope = "testscope", key = "StorageKey")}) 14 | 15 | # read the files with column information 16 | df = spark.read.format("csv")\ 17 | .option("inferSchema", 'true')\ 18 | .option("header",'true') \ 19 | .load(mount_point+"/"+datafile) 20 | 21 | # transformation - keep only 2 features - MinTemp (minimum temperature) and MaxTemp (maximum temperature) - that are relevant for the training 22 | df = df.select('MinTemp','MaxTemp').dropna() 23 | 24 | # save the transformed file as "transformed.csv" 25 | filepath_to_save = '/dbfs' + mount_point + '/transformed.csv' 26 | df.toPandas().to_csv(filepath_to_save,mode = 'w', index=False) 27 | -------------------------------------------------------------------------------- /azure-data-pipeline/scripts/Invoke-ADFPipeline.ps1: -------------------------------------------------------------------------------- 1 | #[CmdletBinding()] 2 | param( 3 | [parameter(Mandatory=$true)] 4 | $ResourceGroupName, 5 | [parameter(Mandatory=$true)] 6 | $DataFactoryName, 7 | [parameter(Mandatory=$true)] 8 | $PipelineName 9 | ) 10 | 11 | Import-Module "$($PSScriptRoot)\Utilities.psm1" 12 | 13 | $runId = Invoke-AzDataFactoryV2Pipeline -ResourceGroupName $ResourceGroupName -DataFactoryName $DataFactoryName -PipelineName $PipelineName 14 | 15 | Write-Host "##vso[task.setvariable variable=AdfPipeline.RunId]$($runId)" 16 | 17 | $runDetails = Get-AzDataFactoryV2PipelineRun -ResourceGroupName $ResourceGroupName -DataFactoryName $DataFactoryName -PipelineRunId $runId 18 | 19 | while ($runDetails.Status -eq "InProgress") { 20 | LogInfo "Waiting for the ADF Pipeline $($runDetails.PipelineName) to finish..." 21 | 22 | Start-Sleep -Seconds 5 23 | 24 | $runDetails = Get-AzDataFactoryV2PipelineRun -ResourceGroupName $ResourceGroupName -DataFactoryName $DataFactoryName -PipelineRunId $runId 25 | } 26 | 27 | if ($runDetails.Status -eq "Succeeded") { 28 | LogInfo "Data Factory Pipeline $($runDetails.PipelineName) succeeded!" 29 | } 30 | else { 31 | LogError "Data Factory Pipeline $($runDetails.PipelineName) failed: $($runDetails.Message)" 32 | throw "Error executing the ADF Pipeline. Check the message above for details." 33 | } -------------------------------------------------------------------------------- /azure-data-pipeline/scripts/Utilities.psm1: -------------------------------------------------------------------------------- 1 | function LogInfo { 2 | param ($Text) 3 | 4 | WriteToConsole -Text $Text -LogLevel "Info" -ForegroundColor White 5 | } 6 | 7 | function LogError { 8 | param ($Text) 9 | 10 | WriteToConsole -Text $Text -LogLevel "Error" -ForegroundColor Red 11 | } 12 | 13 | function LogWarning { 14 | param ($Text) 15 | 16 | WriteToConsole -Text $Text -LogLevel "Warn" -ForegroundColor Yellow 17 | } 18 | 19 | function WriteToConsole { 20 | param ($Text, $LogLevel, $ForegroundColor) 21 | 22 | Write-Host -ForegroundColor $ForegroundColor "[$(Get-Date)] [$($LogLevel)]`t$($Text)" 23 | } 24 | 25 | function Get-MDWResourceGroupName { 26 | param([parameter(Mandatory=$true)][string]$baseName) 27 | 28 | $resourceGroupName = "$($baseName)-RG" 29 | 30 | return $resourceGroupName 31 | } 32 | 33 | function Get-MDWVnetName { 34 | param([parameter(Mandatory=$true)][string]$baseName) 35 | 36 | $vnetName = "$($baseName)-VNet" 37 | 38 | return $vnetName 39 | } 40 | 41 | function Get-MDWKeyVaultName { 42 | param( 43 | [parameter(Mandatory=$true)][string]$baseName, 44 | [bool]$encrypted=$false 45 | ) 46 | 47 | $keyVaultName = "" 48 | 49 | if(!$encrypted) { 50 | $keyVaultName = "$($baseName)-KV" 51 | } else { 52 | $keyVaultName = "$($baseName)-KV-Enc" 53 | } 54 | 55 | return $keyVaultName 56 | } 57 | 58 | function Get-MDWDataFactoryName{ 59 | param([parameter(Mandatory=$true)][string]$baseName) 60 | 61 | $dataFactoryName = "$($baseName)-ADF" 62 | 63 | return $dataFactoryName 64 | } 65 | 66 | function Get-MDWNetworkSecurityGroupName { 67 | param([parameter(Mandatory=$true)][string]$baseName) 68 | 69 | $nsgName = "$($baseName)-ADB-NSG" 70 | 71 | return $nsgName 72 | } -------------------------------------------------------------------------------- /scripts/Invoke-ADFPipeline.ps1: -------------------------------------------------------------------------------- 1 | #[CmdletBinding()] 2 | param( 3 | [parameter(Mandatory=$true)] 4 | $ResourceGroupName, 5 | [parameter(Mandatory=$true)] 6 | $DataFactoryName, 7 | [parameter(Mandatory=$true)] 8 | $PipelineName 9 | ) 10 | 11 | Import-Module "$($PSScriptRoot)\Utilities.psm1" 12 | 13 | $runId = Invoke-AzDataFactoryV2Pipeline -ResourceGroupName $ResourceGroupName -DataFactoryName $DataFactoryName -PipelineName $PipelineName 14 | 15 | Write-Host "##vso[task.setvariable variable=AdfPipeline.RunId]$($runId)" 16 | 17 | $runDetails = Get-AzDataFactoryV2PipelineRun -ResourceGroupName $ResourceGroupName -DataFactoryName $DataFactoryName -PipelineRunId $runId 18 | 19 | while ($runDetails.Status -eq "InProgress") { 20 | LogInfo "Waiting for the ADF Pipeline $($runDetails.PipelineName) to finish..." 21 | 22 | Start-Sleep -Seconds 5 23 | 24 | $runDetails = Get-AzDataFactoryV2PipelineRun -ResourceGroupName $ResourceGroupName -DataFactoryName $DataFactoryName -PipelineRunId $runId 25 | } 26 | 27 | if ($runDetails.Status -eq "Succeeded") { 28 | LogInfo "Data Factory Pipeline $($runDetails.PipelineName) succeeded!" 29 | } 30 | else { 31 | LogError "Data Factory Pipeline $($runDetails.PipelineName) failed: $($runDetails.Message)" 32 | throw "Error executing the ADF Pipeline. Check the message above for details." 33 | } -------------------------------------------------------------------------------- /scripts/Utilities.psm1: -------------------------------------------------------------------------------- 1 | function LogInfo { 2 | param ($Text) 3 | 4 | WriteToConsole -Text $Text -LogLevel "Info" -ForegroundColor White 5 | } 6 | 7 | function LogError { 8 | param ($Text) 9 | 10 | WriteToConsole -Text $Text -LogLevel "Error" -ForegroundColor Red 11 | } 12 | 13 | function LogWarning { 14 | param ($Text) 15 | 16 | WriteToConsole -Text $Text -LogLevel "Warn" -ForegroundColor Yellow 17 | } 18 | 19 | function WriteToConsole { 20 | param ($Text, $LogLevel, $ForegroundColor) 21 | 22 | Write-Host -ForegroundColor $ForegroundColor "[$(Get-Date)] [$($LogLevel)]`t$($Text)" 23 | } 24 | 25 | function Get-MDWResourceGroupName { 26 | param([parameter(Mandatory=$true)][string]$baseName) 27 | 28 | $resourceGroupName = "$($baseName)-RG" 29 | 30 | return $resourceGroupName 31 | } 32 | 33 | function Get-MDWVnetName { 34 | param([parameter(Mandatory=$true)][string]$baseName) 35 | 36 | $vnetName = "$($baseName)-VNet" 37 | 38 | return $vnetName 39 | } 40 | 41 | function Get-MDWKeyVaultName { 42 | param( 43 | [parameter(Mandatory=$true)][string]$baseName, 44 | [bool]$encrypted=$false 45 | ) 46 | 47 | $keyVaultName = "" 48 | 49 | if(!$encrypted) { 50 | $keyVaultName = "$($baseName)-KV" 51 | } else { 52 | $keyVaultName = "$($baseName)-KV-Enc" 53 | } 54 | 55 | return $keyVaultName 56 | } 57 | 58 | function Get-MDWDataFactoryName{ 59 | param([parameter(Mandatory=$true)][string]$baseName) 60 | 61 | $dataFactoryName = "$($baseName)-ADF" 62 | 63 | return $dataFactoryName 64 | } 65 | 66 | function Get-MDWNetworkSecurityGroupName { 67 | param([parameter(Mandatory=$true)][string]$baseName) 68 | 69 | $nsgName = "$($baseName)-ADB-NSG" 70 | 71 | return $nsgName 72 | } --------------------------------------------------------------------------------