├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── SECURITY.md ├── azure-pipelines ├── adf │ ├── adf-cd.yml │ └── templates │ │ ├── build.yml │ │ ├── deploy.yml │ │ └── test.yml ├── databricks │ ├── databricks-ci.yml │ ├── databricks-lib-cd.template.yml │ ├── databricks-lib-cd.yml │ ├── databricks-notebooks-cd.yml │ └── templates │ │ ├── databricks-auth-step-template.yml │ │ ├── databricks-deploy-library-job-template.yml │ │ ├── databricks-deploy-notebooks-job-template.yml │ │ └── databricks-setup-environment-template.yml ├── iac │ ├── iac-cd.yml │ ├── iac-ci.yml │ └── templates │ │ ├── stage.acceptance-test.yml │ │ ├── stage.deploy.yml │ │ ├── stage.plan.yml │ │ ├── stage.validate.yml │ │ ├── step.install-arm-template-toolkit.yml │ │ ├── step.install-azure-powershell.yml │ │ ├── step.install-databricks-cli.yml │ │ └── step.install-gitversion.yml ├── lib │ ├── lib-cd.yml │ ├── lib-ci.yml │ └── templates │ │ ├── lib-build-publish.yml │ │ └── lib-stage-environment.yml └── variable.environment.template.yml ├── data-platform ├── adf │ ├── arm-template-parameters-definition.json │ ├── dataset │ │ ├── AirportCodesSink.json │ │ ├── AirportCodesSource.json │ │ ├── FlightsDelaysSink.json │ │ ├── FlightsDelaysSource.json │ │ ├── FlightsWeatherSink.json │ │ └── FlightsWeatherSource.json │ ├── linkedService │ │ ├── ADLSLinkedService.json │ │ ├── AzureBlobStorageLinkedService.json │ │ ├── AzureDatabricksLinkedService.json │ │ └── AzureKeyVaultLinkedService.json │ ├── package.json │ ├── pipeline │ │ └── ProcessFlightsDelaysData.json │ └── publish_config.json ├── notebooks │ ├── .flake8 │ └── Shared │ │ └── DataOps │ │ ├── 00 - Temp.py │ │ ├── 01 ADLS Mount.py │ │ └── 02 One Notebook to Rule Them All.py └── src │ ├── bdd-adf-pipelines │ ├── .gitignore │ ├── .vscode │ │ ├── launch.json │ │ └── settings.json │ ├── README.md │ ├── core │ │ ├── __init__.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ └── service_principal.py │ │ └── services │ │ │ ├── __init__.py │ │ │ ├── adls.py │ │ │ └── datafactory.py │ ├── features │ │ ├── environment.py │ │ ├── flight_delays.feature │ │ └── steps │ │ │ ├── common.py │ │ │ └── flight_delays.py │ ├── requirements.txt │ └── scripts │ │ └── junit_converter.py │ └── dataopslib │ ├── CHANGELOG.md │ ├── MANIFEST.in │ ├── README.md │ ├── ReadmeLibExtraExercise.md │ ├── data │ ├── AirportCodeLocationLookupClean.csv │ ├── FlightDelaysWithAirportCodes.csv │ ├── FlightWeatherWithAirportCode.csv │ └── sample_data.csv │ ├── dataopslib │ ├── _init_.py │ ├── schemas.py │ └── spark │ │ ├── __init__.py │ │ ├── data_quality.py │ │ ├── data_transformation.py │ │ └── functions.py │ ├── pytest.ini │ ├── requirements.txt │ ├── samples │ ├── sample_has_inconsistent_dates.py │ ├── sample_has_null.py │ ├── sample_has_valid_dates.py │ ├── sample_read_csv.py │ └── sample_transformation.py │ ├── setup.cfg │ ├── setup.py │ ├── spark │ ├── README.md │ └── docker-compose.yml │ └── tests │ ├── test_data_quality.py │ └── test_data_transformation.py ├── hands-on-lab ├── HOL step-by-step - DevSquad in a Day.md ├── archival │ ├── HOL Exercise 2 only.md │ ├── HOL Exercise 3.md │ ├── HOL Exercise 4.md │ └── HOL step-by-step - DataOps.md ├── lab-files │ └── my-lab-file.json └── media │ ├── 02-One-Notebook-to-Rule-Them-All-1.png │ ├── 02-One-Notebook-to-Rule-Them-All-2.png │ ├── 02-One-Notebook-to-Rule-Them-All-3.png │ ├── 02-One-Notebook-to-Rule-Them-All-4.png │ ├── 02-One-Notebook-to-Rule-Them-All-5.png │ ├── 89-git-repositories.png │ ├── 91-git-workflow.png │ ├── 92-git-workflow-databricks-notebooks.png │ ├── 92-git-workflow-library.png │ ├── 93-naming-conventions.png │ ├── 94-release-lifecycle.png │ ├── ADFPipelineRunning.png │ ├── CI-Iac.png │ ├── PRDEV2QA-1.png │ ├── PRDEV2QA-2.png │ ├── PRDEV2QA.png │ ├── Pipelines-ADF.png │ ├── Pipelines-Databricks.png │ ├── Pipelines-IaC.png │ ├── Pipelines-lib.png │ ├── Pipelines.png │ ├── RGComputeDev.png │ ├── RGDataDev.png │ ├── Run-CDPipeline-ADF.png │ ├── Run-CDPipeline-ADFGood.png │ ├── Run-CDPipeline-Databricks-Lib.png │ ├── Run-CDPipeline-Databricks-Notebooks.png │ ├── Run-CDPipeline-Iac.png │ ├── Run-CDPipeline-lib.png │ ├── Run-CIPipeline-Databricks.png │ ├── Run-CIPipeline-Iac.png │ ├── Run-CIPipeline-lib.png │ ├── SP-secret.png │ ├── Versionlib.png │ ├── adf-copy-data-blob-storage.png │ ├── adf-dataops-eastus2-dev-author.png │ ├── adf-dataops-eastus2-dev-overview.png │ ├── adf-dataops-eastus2-dev-process-data.png │ ├── adf-dataops-eastus2-dev-workspace1.png │ ├── airport-codes-source-csv.png │ ├── airport-codes-sync.png │ ├── alpbaVersionlib.png │ ├── behave-results.png │ ├── behave-script.png │ ├── betaVersionlib.png │ ├── branch-policies-builder.png │ ├── branch-policies-own-owner.png │ ├── compute-template-json.png │ ├── copy-airport-codes-sink.png │ ├── copy-airport-codes.png │ ├── copy-value-clientsecret.png │ ├── dbw-dataops-attaching-cluster.png │ ├── dbw-dataops-eastus2-dev-overview.png │ ├── dbw-dataops-eastus2-dev-ws.png │ ├── dbw-dataops-new-cluster.png │ ├── environments-DEV-Databricks-Notebooks.png │ ├── environments-DEV-Databricks.png │ ├── environments-qa-prod.png │ ├── environments.png │ ├── globaltable-flight_delays_view.png │ ├── high-level-overview-dataops.png │ ├── iac-ci.png │ ├── iac-file-corejson-databricks.png │ ├── iac-folder-databricks.png │ ├── iac-folder-infrastructure.png │ ├── iac-folder-linkedtemplates-subfolders.png │ ├── iac-folder-linkedtemplates.png │ ├── iac-folder-parameters.png │ ├── iac-folder-subfolder-tests.png │ ├── iac-linkedtemplates-template-compute.png │ ├── iac-ordem-scripts.png │ ├── iac-scripts.png │ ├── iac-service-principal.png │ ├── infrastructure-as-code-folder.png │ ├── lakedataopseastus2dev-airport-metadata.png │ ├── lakedataopseastus2dev-layer-landing.png │ ├── lakedataopseastus2dev-layers.png │ ├── lakedataopseastus2dev-overview.png │ ├── last-pipeline-run.png │ ├── mount-adls-1.png │ ├── mount-adls-2.png │ ├── notebook-01-adls-mount.png │ ├── notebook-01-adls-runcell.png │ ├── parameters-dev-json.png │ ├── pipeline-run-results.png │ ├── pipeline-stages-run.png │ ├── pipeline-trigger.png │ ├── rcVersionlib.png │ ├── resource-groups.png │ ├── rg-dataops-compute-dev.png │ ├── rg-dataops-data-dev.png │ ├── scope-dataops.png │ ├── select-test-yml.png │ ├── stgdataopseastus2dev-airport-metadata.png │ ├── stgdataopseastus2dev-containers.png │ ├── stgdataopseastus2dev.png │ ├── task03_01-library-workflow.png │ ├── task03_02-artifacts.png │ ├── task03_02-artifactsliboverview.png │ ├── task03_04-artifactsliboverview.png │ ├── task2_01-Exploring-Python-Custom-Libraries.png │ ├── task2_02-Exploring-Python-Custom-Libraries.png │ ├── task2_03-Exploring-Python-Custom-Libraries.png │ ├── task2_04-Exploring-Python-Custom-Libraries.png │ └── templates-folder.png ├── infrastructure-as-code ├── GitVersion.yml ├── databricks │ ├── dev │ │ └── interactive.json │ ├── prod │ │ └── interactive.json │ ├── qa │ │ └── interactive.json │ └── sandbox │ │ └── core.json ├── infrastructure │ ├── azuredeploy.json │ ├── linkedTemplates │ │ ├── compute │ │ │ └── template.json │ │ ├── data │ │ │ └── template.json │ │ ├── ml │ │ │ └── template.json │ │ └── roleAssigments │ │ │ ├── compute.json │ │ │ └── data.json │ ├── parameters │ │ ├── parameters.dev.template.json │ │ ├── parameters.prod.template.json │ │ └── parameters.qa.template.json │ └── sample-data │ │ ├── AirportCodeLocationLookupClean.zip │ │ ├── FlightDelaysWithAirportCodes.zip │ │ └── FlightWeatherWithAirportCode.zip ├── scripts │ ├── AcceptanceTest.ps1 │ ├── DatabricksClusters.ps1 │ ├── DatabricksScopeCreation.ps1 │ ├── DatabricksSecrets.ps1 │ ├── Deploy.ps1 │ ├── Lint.ps1 │ ├── Plan.ps1 │ ├── PublishOutputs.ps1 │ ├── Sandbox.ps1 │ ├── Setup.ps1 │ ├── UpdateKeyVaultSecrets.ps1 │ └── UploadSampleData.ps1 └── tests │ ├── Compute │ ├── DataFactory.Tests.ps1 │ ├── Databricks.Tests.ps1 │ ├── KeyVault.Tests.ps1 │ ├── ResourceGroup.Tests.ps1 │ └── RoleAssigments.Tests.ps1 │ └── Data │ ├── DataLake.Tests.ps1 │ └── ResourceGroup.Tests.ps1 └── quickstart ├── .gitignore ├── README.md ├── configs ├── cloud-setup │ └── template.json └── dataops │ └── template.json ├── docs ├── 0b-prerequisites-advanced.md ├── 1b-create-hol-setup-file-advanced.md ├── 4-delete-resources.md └── images │ ├── ad-connect-directory.png │ ├── artifact-feed.png │ ├── azure-prereqs-script.png │ ├── create-artifact-feed.png │ ├── install-git-tools.png │ ├── open-cloud-powershell.png │ ├── open-org-settings.png │ ├── project-creation.png │ ├── quickstart-buildservice-1.png │ ├── quickstart-buildservice-2.png │ ├── quickstart-buildservice-3.png │ ├── vm-lab-rdp-connection.png │ └── vm-lab-reset-password.png ├── schemas ├── cloud-setup │ └── config.schema.1.0.0.json └── dataops │ └── config.schema.1.0.0.json └── scripts ├── cloud-setup ├── Delete-AzureResources.ps1 ├── Deploy-AzurePreReqs.ps1 ├── Replace-TemplateArgs.ps1 └── Validate-AzurePreReqs.ps1 ├── dataops ├── Deploy-AzureDevOps.ps1 └── Validate-AzureDevOps.ps1 ├── labvm └── Deploy-LabVM.ps1 └── modules ├── Azure.psm1 ├── AzureDevOps.psm1 ├── Common.psm1 ├── Logging.psm1 ├── RepoOperations.psm1 └── Validation.psm1 /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG] - {ISSUE TITLE}" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[FEAT] - {ISSUE TITLE}" 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | quickstart/configs/cloud-setup/hol.json 2 | quickstart/outputs/hol.json 3 | .DS_Store 4 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to DevSquad In a Day 2 | 3 | Welcome, and thank you for your interest in contributing to our workshop! 4 | 5 | There are many ways that you can contribute, beyond writing or coding. The goal of this document is to provide a high-level overview of how you can get involved. 6 | 7 | ## Contributing 8 | 9 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 10 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 11 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 12 | 13 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 14 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 15 | provided by the bot. You will only need to do this once across all repos using our CLA. 16 | 17 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 18 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 19 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 20 | 21 | ## Asking Questions 22 | 23 | Have a question? Open an issue using the question template and the `question` label. 24 | 25 | The active community will be eager to assist you. Your well-worded question will serve as a resource to others searching for help. 26 | 27 | ## Providing Feedback 28 | 29 | Your comments and feedback are welcome, and the project team is available via handful of different channels. 30 | 31 | ## Reporting Issues 32 | 33 | Have you identified a reproducible problem in a workshop? Have a feature request? We want to hear about it! Here's how you can make reporting your issue as effective as possible. 34 | 35 | # Thank You! 36 | 37 | Your contributions to open source, large or small, make great projects like this possible. Thank you for taking the time to contribute. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # DevSquad In a Day 3 | 4 | Margie's Travel (MT) provides concierge services for business travelers. In an increasingly crowded market, they are always looking for ways to differentiate themselves and provide added value to their corporate customers. 5 | 6 | Recently they've developed a POC for a web app that their internal customer service agents can use to provide additional valuable information to the traveler during the flight booking process. On that, POC they've enabled their agents to enter in the flight information and produce a prediction as to whether the departing flight will encounter a 15-minute or longer delay, considering the weather forecast for the departure hour. Now they want to evaluate deploy the project to production, leveraging DataOps & Software Engineering best practices. 7 | 8 | Sept 2021 9 | 10 | ## Target audience 11 | 12 | - Software Engineers 13 | - Data Engineers 14 | - Data Architects 15 | 16 | ## Abstracts 17 | 18 | ### Workshop 19 | 20 | In this workshop, you will deploy a DataOps reference arquitecture, for understanding best practices of Data Engineering & Software Engineering combined. 21 | 22 | !['Solution Architecture'](./hands-on-lab/media/high-level-overview-dataops.png) 23 | 24 | ### Lab Instructions 25 | 26 | 1. Follow these [quickstart instructions](./quickstart/README.md) to setup your lab environment before starting the Hands-On Lab. 27 | 28 | 2. Follow the [Hands-On Lab instructions](./hands-on-lab/HOL%20step-by-step%20-%20DevSquad%20in%20a%20Day.md#). 29 | 30 | ## Azure services and related products 31 | 32 | - Azure DevOps 33 | - Azure Databricks 34 | - Azure Data Factory 35 | - Azure Data Lake Storage Gen2 36 | 37 | 38 | ## Help & Support 39 | 40 | We welcome feedback and comments from Microsoft SMEs & learning partners who deliver this workshop. 41 | 42 | ***Having trouble?*** 43 | 44 | - First, verify you have followed all written lab instructions (including the quickstart documents). 45 | - Next, submit an issue with a detailed description of the problem. 46 | - Do not submit pull requests. Our content authors will make all changes and submit pull requests for approval. 47 | 48 | If you are planning to present a workshop, *review and test the materials early*! We recommend at least two weeks prior. 49 | 50 | ### Please allow 5 - 10 business days for review and resolution of issues. 51 | 52 | ## Trademarks 53 | 54 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 55 | trademarks or logos is subject to and must follow 56 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 57 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 58 | Any use of third-party trademarks or logos are subject to those third-party's policies. 59 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /azure-pipelines/adf/adf-cd.yml: -------------------------------------------------------------------------------- 1 | # Sample YAML file to validate and export an ARM template into a build artifact 2 | # Requires a package.json file located in the target repository 3 | 4 | trigger: 5 | branches: 6 | include: 7 | - main 8 | - qa 9 | - develop 10 | paths: 11 | include: 12 | - data-platform/adf 13 | 14 | pool: 15 | vmImage: 'ubuntu-latest' 16 | 17 | variables: 18 | - template: ../variable.environment.yml 19 | 20 | stages: 21 | - template: templates/build.yml 22 | parameters: 23 | environment: ${{variables.environment}} 24 | iacCdVariableGroupPrefix: ${{variables.iacCdVariableGroupPrefix}} 25 | iacCdDbwVariableGroupPrefix: ${{variables.iacCdDbwVariableGroupPrefix}} 26 | workingDirectory: 'data-platform/adf' 27 | 28 | - template: templates/deploy.yml 29 | parameters: 30 | environment: ${{variables.environment}} 31 | iacCdVariableGroupPrefix: ${{variables.iacCdVariableGroupPrefix}} 32 | iacCdDbwVariableGroupPrefix: ${{variables.iacCdDbwVariableGroupPrefix}} 33 | pipelineArtifactDirectory: $(System.DefaultWorkingDirectory)/_dataops-adf-cd 34 | 35 | - template: templates/test.yml 36 | parameters: 37 | environment: ${{variables.environment}} 38 | iacCdVariableGroupPrefix: ${{variables.iacCdVariableGroupPrefix}} 39 | iacCdDbwVariableGroupPrefix: ${{variables.iacCdDbwVariableGroupPrefix}} 40 | workingDirectory: data-platform/src/bdd-adf-pipelines 41 | testResultsDirectory: $(System.DefaultWorkingDirectory)/results 42 | -------------------------------------------------------------------------------- /azure-pipelines/adf/templates/build.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: environment 3 | type: string 4 | - name: iacCdVariableGroupPrefix 5 | type: string 6 | - name: iacCdDbwVariableGroupPrefix 7 | type: string 8 | - name: workingDirectory 9 | type: string 10 | 11 | stages: 12 | - stage: build_adf 13 | displayName: "Build the ARM Template" 14 | variables: 15 | - group: '${{ parameters.iacCdVariableGroupPrefix }}-${{ parameters.environment }}' 16 | - group: ${{ parameters.iacCdDbwVariableGroupPrefix }}-${{ parameters.environment }} 17 | jobs: 18 | - job: build 19 | displayName: "Build Data Factory resources" 20 | steps: 21 | - task: AzureCLI@1 22 | displayName: "SETUP: Set Azure Credentials" 23 | inputs: 24 | azureSubscription: spn-iac-${{ parameters.environment }} 25 | scriptLocation: inlineScript 26 | inlineScript: | 27 | set -eu 28 | subscriptionId=$(az account show --query id -o tsv) 29 | echo "##vso[task.setvariable variable=SUBSCRIPTION_ID]$subscriptionId" 30 | addSpnToEnvironment: true 31 | 32 | # Installs Node and the npm packages saved in your package.json file in the build 33 | - task: NodeTool@0 34 | inputs: 35 | versionSpec: '10.x' 36 | displayName: 'Install Node.js' 37 | - task: Npm@1 38 | inputs: 39 | command: 'install' 40 | workingDir: ${{ parameters.workingDirectory }} 41 | verbose: true 42 | displayName: 'Install npm package' 43 | 44 | # Validates all of the Data Factory resources in the repository. You'll get the same validation errors as when "Validate All" is selected. 45 | # Enter the appropriate subscription and name for the source factory. 46 | - task: Npm@1 47 | inputs: 48 | command: 'custom' 49 | workingDir: ${{ parameters.workingDirectory }} 50 | customCommand: 'run build validate $(Build.Repository.LocalPath)/${{ parameters.workingDirectory }} /subscriptions/$(SUBSCRIPTION_ID)/resourceGroups/$(resourceGroupCompute)/providers/Microsoft.DataFactory/factories/$(dataFactoryName)' 51 | displayName: 'Validate' 52 | 53 | # Validate and then generate the ARM template into the destination folder, which is the same as selecting "Publish" from the UX. 54 | # The ARM template generated isn't published to the live version of the factory. Deployment should be done by using a CI/CD pipeline. 55 | - task: Npm@1 56 | inputs: 57 | command: 'custom' 58 | workingDir: ${{ parameters.workingDirectory }} 59 | customCommand: 'run build export $(Build.Repository.LocalPath)/${{ parameters.workingDirectory }} /subscriptions/$(SUBSCRIPTION_ID)/resourceGroups/$(resourceGroupCompute)/providers/Microsoft.DataFactory/factories/$(dataFactoryName) "../../ArmTemplates"' 60 | displayName: 'Validate and Generate ARM template' 61 | 62 | # Publish the artifact to be used as a source for a release pipeline. 63 | - task: PublishPipelineArtifact@1 64 | inputs: 65 | targetPath: '$(Build.Repository.LocalPath)/ArmTemplates' 66 | artifact: 'ArmTemplates' 67 | publishLocation: 'pipeline' 68 | -------------------------------------------------------------------------------- /azure-pipelines/adf/templates/deploy.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: environment 3 | type: string 4 | - name: iacCdVariableGroupPrefix 5 | type: string 6 | - name: iacCdDbwVariableGroupPrefix 7 | type: string 8 | - name: pipelineArtifactDirectory 9 | type: string 10 | 11 | stages: 12 | - stage: deploy_adf 13 | dependsOn: build_adf 14 | displayName: "Deploy Data Factory to subscription" 15 | variables: 16 | - group: ${{ parameters.iacCdVariableGroupPrefix }}-${{ parameters.environment }} 17 | - group: ${{ parameters.iacCdDbwVariableGroupPrefix }}-${{ parameters.environment }} 18 | jobs: 19 | - job: deploy 20 | steps: 21 | - task: AzureCLI@1 22 | displayName: "SETUP: Set Azure Credentials" 23 | inputs: 24 | azureSubscription: spn-iac-${{ parameters.environment }} 25 | scriptLocation: inlineScript 26 | inlineScript: | 27 | set -eu 28 | subscriptionId=$(az account show --query id -o tsv) 29 | echo "##vso[task.setvariable variable=SUBSCRIPTION_ID]$subscriptionId" 30 | addSpnToEnvironment: true 31 | - task: DownloadPipelineArtifact@2 32 | inputs: 33 | artifact: ArmTemplates 34 | path: ${{ parameters.pipelineArtifactDirectory }} 35 | - task: AzurePowerShell@4 36 | displayName: 'Pre-deployment' 37 | inputs: 38 | azureSubscription: spn-iac-${{ parameters.environment }} 39 | ScriptPath: '${{ parameters.pipelineArtifactDirectory }}/PrePostDeploymentScript.ps1' 40 | ScriptArguments: '-predeployment $True -armTemplate "${{ parameters.pipelineArtifactDirectory }}/ARMTemplateForFactory.json" -ResourceGroupName "$(resourceGroupCompute)" -DataFactoryName "$(dataFactoryName)"' 41 | azurePowerShellVersion: LatestVersion 42 | 43 | - task: AzureResourceGroupDeployment@2 44 | displayName: 'Deploy to Azure Subscription' 45 | inputs: 46 | azureSubscription: spn-iac-${{ parameters.environment }} 47 | resourceGroupName: '$(resourceGroupCompute)' 48 | location: '$(location)' 49 | csmFile: '${{ parameters.pipelineArtifactDirectory }}/ARMTemplateForFactory.json' 50 | csmParametersFile: '${{ parameters.pipelineArtifactDirectory }}/ARMTemplateParametersForFactory.json' 51 | overrideParameters: 52 | -factoryName "$(dataFactoryName)" 53 | -AzureDatabricksLinkedService_properties_typeProperties_domain "https://$(databricksWorkspaceUrl)" 54 | -AzureDatabricksLinkedService_properties_typeProperties_existingClusterId "$(interactive-cluster)" 55 | -AzureDatabricksLinkedService_properties_typeProperties_workspaceResourceId "/subscriptions/$(SUBSCRIPTION_ID)/resourceGroups/$(resourceGroupCompute)/providers/Microsoft.Databricks/workspaces/$(databricksName)" 56 | -ADLSLinkedService_properties_typeProperties_url "https://$(dataLakeName).dfs.core.windows.net" 57 | -AzureKeyVaultLinkedService_properties_typeProperties_baseUrl "https://$(keyVaultName).vault.azure.net/" 58 | deploymentMode: Incremental 59 | - task: AzurePowerShell@4 60 | displayName: 'Post-deployment' 61 | inputs: 62 | azureSubscription: spn-iac-${{ parameters.environment }} 63 | ScriptPath: '${{ parameters.pipelineArtifactDirectory }}/PrePostDeploymentScript.ps1' 64 | ScriptArguments: '-predeployment $False -armTemplate "${{ parameters.pipelineArtifactDirectory }}/ARMTemplateForFactory.json" -ResourceGroupName "$(resourceGroupCompute)" -DataFactoryName "$(dataFactoryName)"' 65 | azurePowerShellVersion: LatestVersion 66 | -------------------------------------------------------------------------------- /azure-pipelines/adf/templates/test.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: environment 3 | type: string 4 | - name: iacCdVariableGroupPrefix 5 | type: string 6 | - name: iacCdDbwVariableGroupPrefix 7 | type: string 8 | - name: workingDirectory 9 | type: string 10 | - name: testResultsDirectory 11 | type: string 12 | 13 | stages: 14 | - stage: behavior_tests 15 | dependsOn: deploy_adf 16 | condition: and(succeeded(), ne(variables['Build.SourceBranchName'], 'main')) 17 | displayName: "Run behavior tests" 18 | variables: 19 | - group: ${{ parameters.iacCdVariableGroupPrefix }}-${{ parameters.environment }} 20 | - group: ${{ parameters.iacCdDbwVariableGroupPrefix }}-${{ parameters.environment }} 21 | jobs: 22 | - job: adf_pipelines_tests 23 | displayName: "Run behavior tests" 24 | steps: 25 | - task: AzureCLI@1 26 | displayName: "SETUP: Set Azure Credentials" 27 | inputs: 28 | azureSubscription: spn-iac-${{ parameters.environment }} 29 | scriptLocation: inlineScript 30 | inlineScript: | 31 | set -eu 32 | 33 | subscriptionId=$(az account show --query id -o tsv) 34 | accountKey=$(az storage account keys list \ 35 | -n $(dataLakeName) \ 36 | -g $(resourceGroupData) \ 37 | --query [0].value -o tsv) 38 | 39 | echo "##vso[task.setvariable variable=CLIENT_ID]$servicePrincipalId" 40 | echo "##vso[task.setvariable variable=CLIENT_SECRET;issecret=true]$servicePrincipalKey" 41 | echo "##vso[task.setvariable variable=SUBSCRIPTION_ID]$subscriptionId" 42 | echo "##vso[task.setvariable variable=TENANT_ID]$tenantId" 43 | echo "##vso[task.setvariable variable=STORAGE_ACCOUNT_KEY;issecret=true]$accountKey" 44 | addSpnToEnvironment: true 45 | 46 | - task: UsePythonVersion@0 47 | inputs: 48 | versionSpec: "3.7" 49 | 50 | - script: pip install -r requirements.txt 51 | displayName: "SETUP: Install all dependencies" 52 | workingDirectory: ${{ parameters.workingDirectory }} 53 | 54 | - script: | 55 | export CLIENT_ID=$(CLIENT_ID) 56 | export CLIENT_SECRET=${CLIENT_SECRET} 57 | export SUBSCRIPTION_ID=$(SUBSCRIPTION_ID) 58 | export TENANT_ID=$(TENANT_ID) 59 | export ADF_NAME=$(dataFactoryName) 60 | export RESOURCE_GROUP_NAME=$(resourceGroupCompute) 61 | export STORAGE_ACCOUNT_NAME=$(dataLakeName) 62 | export STORAGE_ACCOUNT_KEY=${STORAGE_ACCOUNT_KEY} 63 | 64 | BEHAVE_RESULTS=${{ parameters.testResultsDirectory }}/behave_results.json 65 | behave --format=json -o $BEHAVE_RESULTS 66 | displayName: "TEST: Run behave features" 67 | workingDirectory: ${{ parameters.workingDirectory }} 68 | env: 69 | CLIENT_SECRET: $(CLIENT_SECRET) 70 | STORAGE_ACCOUNT_KEY: $(STORAGE_ACCOUNT_KEY) 71 | 72 | - script: | 73 | BEHAVE_RESULTS=${{ parameters.testResultsDirectory }}/behave_results.json 74 | JUNIT_RESULTS=${{ parameters.testResultsDirectory }}/output.xml 75 | python3 scripts/junit_converter.py $BEHAVE_RESULTS $JUNIT_RESULTS 76 | displayName: 'RESULTS: Convert behavior test results to JUnit format' 77 | condition: succeededOrFailed() 78 | workingDirectory: ${{ parameters.workingDirectory }} 79 | 80 | - task: PublishTestResults@2 81 | inputs: 82 | testResultsFormat: 'JUnit' 83 | testResultsFiles: '${{ parameters.testResultsDirectory }}/output.xml' 84 | failTaskOnFailedTests: true 85 | condition: succeededOrFailed() 86 | displayName: 'RESULTS: Publish behavior test results' 87 | 88 | - publish: ${{ parameters.testResultsDirectory }} 89 | artifact: BDDTestResults 90 | condition: succeededOrFailed() 91 | displayName: 'RESULTS: Publish artifacts' 92 | -------------------------------------------------------------------------------- /azure-pipelines/databricks/databricks-ci.yml: -------------------------------------------------------------------------------- 1 | trigger: none 2 | 3 | variables: 4 | NOTEBOOK_WORKING_DIR: $(System.DefaultWorkingDirectory)/data-platform/notebooks 5 | 6 | pool: 7 | vmImage: 'ubuntu-latest' 8 | 9 | jobs: 10 | - job: 'validate_notebooks' 11 | displayName: 'Validate Databricks Notebooks' 12 | steps: 13 | - task: UsePythonVersion@0 14 | inputs: 15 | versionSpec: '3.9' 16 | addToPath: true 17 | architecture: 'x64' 18 | displayName: 'Use Python Version: 3.9' 19 | 20 | - script: | 21 | python -m pip install --upgrade pip 22 | pip install flake8 23 | displayName: 'Setup Agent' 24 | 25 | - script: | 26 | echo 'Working directory: $(NOTEBOOK_WORKING_DIR)' 27 | flake8 ./ 28 | displayName: 'Validating notebook with flake8' 29 | workingDirectory: $(NOTEBOOK_WORKING_DIR) -------------------------------------------------------------------------------- /azure-pipelines/databricks/databricks-lib-cd.template.yml: -------------------------------------------------------------------------------- 1 | trigger: none 2 | 3 | resources: 4 | pipelines: 5 | - pipeline: lib 6 | source: -lib-cd 7 | trigger: 8 | branches: 9 | - develop 10 | - qa 11 | - main 12 | 13 | variables: 14 | - template: ../variable.environment.yml 15 | 16 | stages: 17 | - template: templates/databricks-deploy-library-job-template.yml 18 | parameters: 19 | branch: ${{variables.branch}} 20 | environment: ${{variables.environment}} 21 | azureServiceConnection: ${{variables.azureServiceConnection}} 22 | -------------------------------------------------------------------------------- /azure-pipelines/databricks/databricks-lib-cd.yml: -------------------------------------------------------------------------------- 1 | trigger: none 2 | 3 | resources: 4 | pipelines: 5 | - pipeline: lib 6 | source: -lib-cd 7 | trigger: 8 | branches: 9 | - develop 10 | - qa 11 | - main 12 | 13 | variables: 14 | - template: ../variable.environment.yml 15 | 16 | stages: 17 | - template: templates/databricks-deploy-library-job-template.yml 18 | parameters: 19 | branch: ${{variables.branch}} 20 | environment: ${{variables.environment}} 21 | azureServiceConnection: ${{variables.azureServiceConnection}} 22 | -------------------------------------------------------------------------------- /azure-pipelines/databricks/databricks-notebooks-cd.yml: -------------------------------------------------------------------------------- 1 | trigger: 2 | branches: 3 | include: 4 | - qa 5 | - main 6 | - develop 7 | 8 | paths: 9 | include: 10 | - data-platform/notebooks/* 11 | 12 | variables: 13 | - template: ../variable.environment.yml 14 | 15 | stages: 16 | - template: templates/databricks-deploy-notebooks-job-template.yml 17 | parameters: 18 | environment: ${{variables.environment}} 19 | azureServiceConnection: ${{variables.azureServiceConnection}} 20 | iacCdVariableGroupPrefix: ${{variables.iacCdVariableGroupPrefix}} -------------------------------------------------------------------------------- /azure-pipelines/databricks/templates/databricks-auth-step-template.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: azureServiceConnection 3 | type: string 4 | 5 | steps: 6 | 7 | - task: AzureCLI@1 8 | displayName: Get Databricks token 9 | inputs: 10 | azureSubscription: ${{ parameters.azureServiceConnection }} 11 | scriptLocation: inlineScript 12 | inlineScript: | 13 | databricks_resource_id="2ff814a6-3304-4ab8-85cb-cd0e6f879c1d" # This is the official databricks resource id 14 | accessToken=$(curl -X GET -H 'Content-Type: application/x-www-form-urlencoded' \ 15 | -d "grant_type=client_credentials&client_id=$servicePrincipalId&resource=$databricks_resource_id&client_secret=$servicePrincipalKey" \ 16 | https://login.microsoftonline.com/$tenantId/oauth2/token \ 17 | | jq -r .access_token) 18 | echo "##vso[task.setvariable variable=DATABRICKS_TOKEN;issecret=true]$accessToken" 19 | addSpnToEnvironment: true -------------------------------------------------------------------------------- /azure-pipelines/databricks/templates/databricks-deploy-library-job-template.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: environment 3 | type: string 4 | - name: azureServiceConnection 5 | type: string 6 | - name: branch 7 | type: string 8 | 9 | stages: 10 | - stage: publish_library_${{ parameters.environment }} 11 | displayName: 'Deploy to ${{ parameters.environment }} Databricks' 12 | jobs: 13 | - deployment: publish_library_${{ parameters.environment }} 14 | displayName: 'Deploy to ${{ parameters.environment }} Databricks' 15 | pool: 16 | vmImage: 'ubuntu-latest' 17 | environment: databricks-${{ parameters.environment }} 18 | variables: 19 | - group: dataops-iac-cd-output-${{ parameters.environment }} 20 | strategy: 21 | runOnce: 22 | deploy: 23 | steps: 24 | - task: DownloadPackage@1 25 | inputs: 26 | packageType: 'pypi' 27 | feed: '$(System.TeamProjectId)/lib-packages' 28 | definition: 'dataopslib' 29 | version: 'latest' 30 | downloadPath: '$(System.ArtifactsDirectory)/lib-library' 31 | - template: ./databricks-setup-environment-template.yml 32 | - template: ./databricks-auth-step-template.yml 33 | parameters: 34 | azureServiceConnection: ${{ parameters.azureServiceConnection }} 35 | - script: | 36 | echo "DBFS URL ${DATABRICKS_HOST}" 37 | echo "Move previous versions of the library in cluster from ${DATABRICKS_LIB_PATH}/new_version/ ${DATABRICKS_LIB_PATH}/old_version" 38 | databricks fs mkdirs "${DATABRICKS_LIB_PATH}/old_version" 39 | databricks fs cp --overwrite -r "${DATABRICKS_LIB_PATH}/new_version" "${DATABRICKS_LIB_PATH}/old_version" 40 | databricks fs rm -r "${DATABRICKS_LIB_PATH}/new_version" 41 | databricks fs mkdirs "${DATABRICKS_LIB_PATH}/new_version" 42 | echo "Copy library in cluster from ${PYPI_REPO} to ${DATABRICKS_LIB_PATH}/new_version" 43 | databricks fs cp --overwrite -r "${PYPI_REPO}" "${DATABRICKS_LIB_PATH}/new_version" 44 | env: 45 | DATABRICKS_HOST: https://$(databricksWorkspaceUrl) 46 | DATABRICKS_TOKEN: $(DATABRICKS_TOKEN) 47 | PYPI_REPO: $(System.ArtifactsDirectory)/lib-library 48 | DATABRICKS_LIB_PATH: dbfs:/FileStore/pypi-libs 49 | displayName: 'Copy and install python library' 50 | -------------------------------------------------------------------------------- /azure-pipelines/databricks/templates/databricks-deploy-notebooks-job-template.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: environment 3 | type: string 4 | - name: azureServiceConnection 5 | type: string 6 | - name: iacCdVariableGroupPrefix 7 | type: string 8 | 9 | stages: 10 | - stage: publish_static_artifacts_${{ parameters.environment }} 11 | displayName: 'Deploy to ${{ parameters.environment }} Databricks' 12 | jobs: 13 | - deployment: publish_static_artifacts_${{ parameters.environment }} 14 | displayName: 'Deploy to ${{ parameters.environment }} Databricks' 15 | pool: 16 | vmImage: 'ubuntu-latest' 17 | environment: databricks-${{ parameters.environment }} 18 | variables: 19 | - group: ${{ parameters.iacCdVariableGroupPrefix }}-${{ parameters.environment }} 20 | strategy: 21 | runOnce: 22 | deploy: 23 | steps: 24 | - checkout: self 25 | - task: PublishBuildArtifacts@1 26 | inputs: 27 | PathtoPublish: data-platform/notebooks 28 | ArtifactName: 'databricks-${{ parameters.environment }}' 29 | displayName: 'Publish Databricks Artifacts' 30 | - template: ./databricks-setup-environment-template.yml 31 | - template: ./databricks-auth-step-template.yml 32 | parameters: 33 | azureServiceConnection: ${{ parameters.azureServiceConnection }} 34 | - script: | 35 | echo "Uploading notebooks at ${NOTEBOOKS_PATH} to workspace (${DATABRICKS_NOTEBOOK_PATH})..." 36 | databricks workspace import_dir --overwrite "${NOTEBOOKS_PATH}" "${DATABRICKS_NOTEBOOK_PATH}" 37 | env: 38 | DATABRICKS_HOST: https://$(databricksWorkspaceUrl) 39 | DATABRICKS_TOKEN: $(DATABRICKS_TOKEN) 40 | NOTEBOOKS_PATH: $(Pipeline.Workspace)/s/data-platform/notebooks 41 | DATABRICKS_NOTEBOOK_PATH: '/' 42 | displayName: 'Deploy notebooks' 43 | -------------------------------------------------------------------------------- /azure-pipelines/databricks/templates/databricks-setup-environment-template.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | 3 | - task: UsePythonVersion@0 4 | inputs: 5 | versionSpec: '3.9' 6 | addToPath: true 7 | architecture: 'x64' 8 | displayName: 'Use Python Version: 3.9' 9 | 10 | - script: | 11 | echo "Downloading Databricks CLI last version..." 12 | curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh 13 | chmod +x databricks 14 | sudo mv databricks /usr/local/bin/databricks 15 | 16 | echo "Verifying Databricks CLI version..." 17 | databricks version 18 | displayName: 'Install Databricks CLI last version (Binary)' -------------------------------------------------------------------------------- /azure-pipelines/iac/iac-cd.yml: -------------------------------------------------------------------------------- 1 | trigger: 2 | branches: 3 | include: 4 | - main 5 | - qa 6 | - develop 7 | paths: 8 | include: 9 | - infrastructure-as-code/databricks 10 | - infrastructure-as-code/infrastructure 11 | 12 | variables: 13 | - template: ../variable.environment.yml 14 | 15 | stages: 16 | - template: templates/stage.plan.yml 17 | parameters: 18 | environment: ${{variables.environment}} 19 | azureServiceConnection: ${{variables.azureServiceConnection}} 20 | location: ${{variables.location}} 21 | solutionName: ${{variables.solutionName}} 22 | 23 | - template: templates/stage.deploy.yml 24 | parameters: 25 | environment: ${{variables.environment}} 26 | azureServiceConnection: ${{variables.azureServiceConnection}} 27 | location: ${{variables.location}} 28 | solutionName: ${{variables.solutionName}} 29 | 30 | - template: templates/stage.acceptance-test.yml 31 | parameters: 32 | environment: ${{variables.environment}} 33 | azureServiceConnection: ${{variables.azureServiceConnection}} 34 | solutionName: ${{variables.solutionName}} 35 | -------------------------------------------------------------------------------- /azure-pipelines/iac/iac-ci.yml: -------------------------------------------------------------------------------- 1 | trigger: none 2 | 3 | stages: 4 | - template: templates/stage.validate.yml -------------------------------------------------------------------------------- /azure-pipelines/iac/templates/stage.acceptance-test.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: environment 3 | type: string 4 | - name: azureServiceConnection 5 | type: string 6 | - name: solutionName 7 | type: string 8 | 9 | stages: 10 | - stage: test 11 | displayName: 'Acceptance Test for ${{ parameters.environment }}' 12 | jobs: 13 | - job: test 14 | displayName: 'Acceptance Test for ${{ parameters.environment }}' 15 | pool: 16 | vmImage: 'ubuntu-latest' 17 | variables: 18 | azPowershellVersion: 7.5.0 19 | steps: 20 | - task: AzurePowerShell@5 21 | displayName: 'Acceptance Test' 22 | inputs: 23 | azureSubscription: ${{ parameters.azureServiceConnection }} 24 | scriptType: filePath 25 | scriptPath: $(Build.SourcesDirectory)/infrastructure-as-code/scripts/AcceptanceTest.ps1 26 | scriptArguments: > 27 | -AzureDevOpsPAT "$(System.AccessToken)" 28 | -AzureDevOpsOrganization $(System.TeamFoundationCollectionUri) 29 | -AzureDevOpsProject "$(System.TeamProject)" 30 | -SolutionName "${{ parameters.solutionName }}" 31 | -Environment ${{ parameters.environment }} 32 | azurePowerShellVersion: 'OtherVersion' 33 | preferredAzurePowerShellVersion: $(azPowershellVersion) 34 | env: 35 | AzureDevOpsPAT: $(System.AccessToken) 36 | - task: PublishTestResults@2 37 | displayName: 'Pester Acceptance Tests' 38 | inputs: 39 | testResultsFormat: 'NUnit' 40 | testResultsFiles: '**/testResults.xml' 41 | failTaskOnFailedTests: true 42 | testRunTitle: 'Pester Acceptance Tests (${{ parameters.environment }})' -------------------------------------------------------------------------------- /azure-pipelines/iac/templates/stage.plan.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: environment 3 | type: string 4 | - name: azureServiceConnection 5 | type: string 6 | - name: location 7 | type: string 8 | - name: solutionName 9 | type: string 10 | 11 | stages: 12 | - stage: plan 13 | displayName: 'Plan for ${{ parameters.environment }}' 14 | jobs: 15 | - job: plan 16 | displayName: 'Plan for ${{ parameters.environment }}' 17 | pool: 18 | vmImage: 'ubuntu-latest' 19 | variables: 20 | azPowershellVersion: 7.5.0 21 | steps: 22 | - template: step.install-gitversion.yml 23 | parameters: 24 | gitversionConfigFile: $(Build.SourcesDirectory)/infrastructure-as-code/GitVersion.yml 25 | - task: AzurePowerShell@5 26 | displayName: 'Plan' 27 | inputs: 28 | azureSubscription: ${{ parameters.azureServiceConnection }} 29 | scriptType: filePath 30 | scriptPath: infrastructure-as-code/scripts/Plan.ps1 31 | scriptArguments: > 32 | -Environment ${{ parameters.environment }} 33 | -Location "${{ parameters.location }}" 34 | -SolutionName "${{ parameters.solutionName }}" 35 | -Version $(GitVersion.SemVer) 36 | -VersionDescription "$(Build.SourceVersionMessage)" 37 | -VersionBuildId $(Build.BuildId) 38 | -VersionAuthor $(Build.RequestedForEmail) 39 | azurePowerShellVersion: 'OtherVersion' 40 | preferredAzurePowerShellVersion: $(azPowershellVersion) 41 | -------------------------------------------------------------------------------- /azure-pipelines/iac/templates/stage.validate.yml: -------------------------------------------------------------------------------- 1 | stages: 2 | - stage: validate 3 | displayName: 'Validate' 4 | jobs: 5 | - job: lint 6 | displayName: 'Lint' 7 | pool: 8 | vmImage: 'ubuntu-latest' 9 | steps: 10 | - template: step.install-arm-template-toolkit.yml 11 | parameters: 12 | ttkFolder: ./ttk 13 | - task: PowerShell@2 14 | displayName: Run ARM Template Test Tookit 15 | inputs: 16 | pwsh: true 17 | targetType: 'filePath' 18 | filePath: infrastructure-as-code/scripts/Lint.ps1 19 | arguments: > 20 | -TtkFolder "./ttk" 21 | -------------------------------------------------------------------------------- /azure-pipelines/iac/templates/step.install-arm-template-toolkit.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: ttkFolder 3 | type: string 4 | - name: ttkUri 5 | type: string 6 | default: https://aka.ms/arm-ttk-latest 7 | 8 | steps: 9 | - pwsh: | 10 | New-Item '${{ parameters.ttkFolder }}' -ItemType Directory 11 | Invoke-WebRequest -Uri '${{ parameters.ttkUri }}' -OutFile '${{ parameters.ttkFolder }}/arm-ttk.zip' -Verbose 12 | Expand-Archive -Path '${{ parameters.ttkFolder }}/*.zip' -DestinationPath '${{ parameters.ttkFolder }}' -Verbose 13 | displayName: 'Install ARM Template Test Tookit' -------------------------------------------------------------------------------- /azure-pipelines/iac/templates/step.install-azure-powershell.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: azPowershellVersion 3 | type: string 4 | default: 7.5.0 5 | 6 | steps: 7 | - pwsh: Save-Module -Path /usr/share/az_${{ parameters.azPowershellVersion }} -Name Az -RequiredVersion ${{ parameters.azPowershellVersion }} -Force -ErrorAction Stop -Verbose 8 | displayName: 'Install Azure Powershell ${{ parameters.azPowershellVersion }}' 9 | -------------------------------------------------------------------------------- /azure-pipelines/iac/templates/step.install-databricks-cli.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: azureServiceConnection 3 | type: string 4 | 5 | steps: 6 | - task: UsePythonVersion@0 7 | inputs: 8 | versionSpec: 3.9 9 | addToPath: true 10 | architecture: 'x64' 11 | displayName: 'Use Python Version: 3.9' 12 | 13 | - script: | 14 | # Use the latest stable version from GitHub releases 15 | CLI_VERSION="0.252.0" 16 | 17 | echo "Downloading Databricks CLI v${CLI_VERSION} (v2.x)..." 18 | 19 | # Download with proper error handling 20 | curl -fsSL --output databricks.tar.gz "https://github.com/databricks/cli/releases/download/v${CLI_VERSION}/databricks_cli_${CLI_VERSION}_linux_amd64.tar.gz" 21 | 22 | if [ $? -ne 0 ]; then 23 | echo "##vso[task.logissue type=error]Failed to download Databricks CLI" 24 | exit 1 25 | fi 26 | 27 | # Extract and install 28 | tar -xzf databricks.tar.gz 29 | chmod +x databricks 30 | sudo mv databricks /usr/local/bin/ 31 | 32 | # Verify installation 33 | echo "Verifying Databricks CLI version..." 34 | databricks version 35 | 36 | if [ $? -ne 0 ]; then 37 | echo "##vso[task.logissue type=error]Databricks CLI installation failed" 38 | exit 1 39 | fi 40 | displayName: 'Install Databricks CLI v2.x (binary)' 41 | 42 | - script: | 43 | echo "Configuring Databricks CLI authentication..." 44 | 45 | # Create config directory if it doesn't exist 46 | mkdir -p ~/.databricks 47 | 48 | # Create config file with service principal credentials 49 | cat > ~/.databrickscfg << EOF 50 | [DEFAULT] 51 | host = https://$(databricksWorkspaceUrl) 52 | azure_client_id = $(ARM_CLIENT_ID) 53 | azure_client_secret = $(ARM_CLIENT_SECRET) 54 | azure_tenant_id = $(ARM_TENANT_ID) 55 | azure_use_msi = false 56 | EOF 57 | 58 | # Test authentication 59 | databricks workspace list / 60 | 61 | if [ $? -ne 0 ]; then 62 | echo "##vso[task.logissue type=error]Databricks CLI authentication failed" 63 | exit 1 64 | fi 65 | 66 | echo "Authentication successful!" 67 | displayName: 'Authenticate with Databricks CLI' -------------------------------------------------------------------------------- /azure-pipelines/iac/templates/step.install-gitversion.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: gitversionVersion 3 | type: string 4 | default: 6.2.x 5 | - name: gitversionConfigFile 6 | type: string 7 | 8 | steps: 9 | - task: gitversion/setup@3.2.0 10 | displayName: 'Install GitVersion' 11 | inputs: 12 | versionSpec: '${{ parameters.gitversionVersion }}' 13 | 14 | - task: gitversion/execute@3.2.0 15 | displayName: 'Execute GitVersion' 16 | inputs: 17 | useConfigFile: true 18 | configFilePath: '${{ parameters.gitversionConfigFile }}' 19 | -------------------------------------------------------------------------------- /azure-pipelines/lib/lib-cd.yml: -------------------------------------------------------------------------------- 1 | trigger: 2 | batch: true 3 | branches: 4 | include: 5 | - develop 6 | - qa 7 | - main 8 | paths: 9 | include: 10 | - data-platform/src/dataopslib/dataopslib 11 | 12 | variables: 13 | - template: ../variable.environment.yml 14 | 15 | stages: 16 | - template: templates/lib-stage-environment.yml 17 | parameters: 18 | branch: 'refs/heads/develop' 19 | environment: dev 20 | prereleaseAlias: beta 21 | variableGroupName: ${{variables.variableGroupName}} 22 | artifactFeed: ${{variables.artifactFeed}} 23 | 24 | - template: templates/lib-stage-environment.yml 25 | parameters: 26 | branch: 'refs/heads/qa' 27 | environment: qa 28 | prereleaseAlias: preview 29 | variableGroupName: ${{variables.variableGroupName}} 30 | artifactFeed: ${{variables.artifactFeed}} 31 | 32 | - template: templates/lib-stage-environment.yml 33 | parameters: 34 | branch: 'refs/heads/main' 35 | environment: prod 36 | variableGroupName: ${{variables.variableGroupName}} 37 | artifactFeed: ${{variables.artifactFeed}} 38 | -------------------------------------------------------------------------------- /azure-pipelines/lib/lib-ci.yml: -------------------------------------------------------------------------------- 1 | trigger: none 2 | 3 | pool: 4 | vmImage: "ubuntu-latest" 5 | 6 | variables: 7 | - template: ../variable.environment.yml 8 | 9 | jobs: 10 | - template: templates/lib-build-publish.yml 11 | parameters: 12 | artifactFeed: ${{variables.artifactFeed}} 13 | variableGroupName: ${{variables.variableGroupName}} 14 | prereleaseAlias: alpha 15 | sourceBranchName: $(System.PullRequest.SourceBranch) -------------------------------------------------------------------------------- /azure-pipelines/lib/templates/lib-stage-environment.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: branch 3 | type: string 4 | - name: environment 5 | type: string 6 | - name: libraryDirectory 7 | type: string 8 | default: $(System.DefaultWorkingDirectory)/data-platform/src/dataopslib 9 | - name: artifactFeed 10 | type: string 11 | - name: variableGroupName 12 | type: string 13 | - name: prereleaseAlias 14 | type: string 15 | default: none 16 | values: 17 | - alpha 18 | - beta 19 | - preview 20 | - none 21 | 22 | stages: 23 | - stage: publish_${{ parameters.environment }} 24 | condition: eq(variables['Build.SourceBranch'], '${{ parameters.branch }}') 25 | displayName: 'Package for ${{ parameters.environment }}' 26 | jobs: 27 | - template: lib-build-publish.yml 28 | parameters: 29 | libraryDirectory: ${{ parameters.libraryDirectory }} 30 | artifactFeed: ${{ parameters.artifactFeed }} 31 | variableGroupName: ${{ parameters.variableGroupName }} 32 | prereleaseAlias: ${{ parameters.prereleaseAlias }} 33 | sourceBranchName: main 34 | -------------------------------------------------------------------------------- /azure-pipelines/variable.environment.template.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | iacCdVariableGroupPrefix: 'dataops-iac-cd-output' 3 | iacCdDbwVariableGroupPrefix: 'dataops-iac-dbw-cd-output' 4 | artifactFeed: /lib-packages 5 | variableGroupName: lib-versions 6 | location: eastus 7 | solutionName: 8 | ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 9 | environment: prod 10 | azureServiceConnection: spn-iac-prod 11 | branch: 'refs/heads/main' 12 | ${{ if eq(variables['Build.SourceBranchName'], 'qa') }}: 13 | environment: qa 14 | azureServiceConnection: spn-iac-qa 15 | branch: 'refs/heads/qa' 16 | ${{ if and(ne(variables['Build.SourceBranchName'], 'main'), ne(variables['Build.SourceBranchName'], 'qa')) }}: 17 | environment: dev 18 | azureServiceConnection: spn-iac-dev 19 | branch: 'refs/heads/develop' -------------------------------------------------------------------------------- /data-platform/adf/dataset/AirportCodesSink.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "AirportCodesSink", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "ADLSLinkedService", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobFSLocation", 13 | "fileName": "AirportCodeLocationLookupClean.csv", 14 | "folderPath": "airport-metadata", 15 | "fileSystem": "landing" 16 | }, 17 | "columnDelimiter": ",", 18 | "escapeChar": "\\", 19 | "quoteChar": "\"" 20 | }, 21 | "schema": [ 22 | { 23 | "type": "String" 24 | }, 25 | { 26 | "type": "String" 27 | }, 28 | { 29 | "type": "String" 30 | }, 31 | { 32 | "type": "String" 33 | }, 34 | { 35 | "type": "String" 36 | } 37 | ] 38 | } 39 | } -------------------------------------------------------------------------------- /data-platform/adf/dataset/AirportCodesSource.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "AirportCodesSource", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "AzureBlobStorageLinkedService", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobStorageLocation", 13 | "fileName": "AirportCodeLocationLookupClean.csv", 14 | "container": "flights-data" 15 | }, 16 | "columnDelimiter": ",", 17 | "escapeChar": "\\", 18 | "quoteChar": "\"" 19 | }, 20 | "schema": [ 21 | { 22 | "type": "String" 23 | }, 24 | { 25 | "type": "String" 26 | }, 27 | { 28 | "type": "String" 29 | }, 30 | { 31 | "type": "String" 32 | }, 33 | { 34 | "type": "String" 35 | } 36 | ] 37 | } 38 | } -------------------------------------------------------------------------------- /data-platform/adf/dataset/FlightsDelaysSink.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "FlightsDelaysSink", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "ADLSLinkedService", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobFSLocation", 13 | "fileName": "FlightDelaysWithAirportCodes.csv", 14 | "folderPath": "flight-delays", 15 | "fileSystem": "landing" 16 | }, 17 | "columnDelimiter": ",", 18 | "escapeChar": "\\", 19 | "quoteChar": "\"" 20 | }, 21 | "schema": [ 22 | { 23 | "type": "String" 24 | }, 25 | { 26 | "type": "String" 27 | }, 28 | { 29 | "type": "String" 30 | }, 31 | { 32 | "type": "String" 33 | }, 34 | { 35 | "type": "String" 36 | }, 37 | { 38 | "type": "String" 39 | }, 40 | { 41 | "type": "String" 42 | }, 43 | { 44 | "type": "String" 45 | }, 46 | { 47 | "type": "String" 48 | }, 49 | { 50 | "type": "String" 51 | }, 52 | { 53 | "type": "String" 54 | }, 55 | { 56 | "type": "String" 57 | }, 58 | { 59 | "type": "String" 60 | }, 61 | { 62 | "type": "String" 63 | }, 64 | { 65 | "type": "String" 66 | }, 67 | { 68 | "type": "String" 69 | }, 70 | { 71 | "type": "String" 72 | }, 73 | { 74 | "type": "String" 75 | }, 76 | { 77 | "type": "String" 78 | }, 79 | { 80 | "type": "String" 81 | } 82 | ] 83 | } 84 | } -------------------------------------------------------------------------------- /data-platform/adf/dataset/FlightsDelaysSource.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "FlightsDelaysSource", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "AzureBlobStorageLinkedService", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobStorageLocation", 13 | "fileName": "FlightDelaysWithAirportCodes.csv", 14 | "container": "flights-data" 15 | }, 16 | "columnDelimiter": ",", 17 | "escapeChar": "\\", 18 | "quoteChar": "\"" 19 | }, 20 | "schema": [ 21 | { 22 | "type": "String" 23 | }, 24 | { 25 | "type": "String" 26 | }, 27 | { 28 | "type": "String" 29 | }, 30 | { 31 | "type": "String" 32 | }, 33 | { 34 | "type": "String" 35 | }, 36 | { 37 | "type": "String" 38 | }, 39 | { 40 | "type": "String" 41 | }, 42 | { 43 | "type": "String" 44 | }, 45 | { 46 | "type": "String" 47 | }, 48 | { 49 | "type": "String" 50 | }, 51 | { 52 | "type": "String" 53 | }, 54 | { 55 | "type": "String" 56 | }, 57 | { 58 | "type": "String" 59 | }, 60 | { 61 | "type": "String" 62 | }, 63 | { 64 | "type": "String" 65 | }, 66 | { 67 | "type": "String" 68 | }, 69 | { 70 | "type": "String" 71 | }, 72 | { 73 | "type": "String" 74 | }, 75 | { 76 | "type": "String" 77 | }, 78 | { 79 | "type": "String" 80 | } 81 | ] 82 | } 83 | } -------------------------------------------------------------------------------- /data-platform/adf/dataset/FlightsWeatherSink.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "FlightsWeatherSink", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "ADLSLinkedService", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobFSLocation", 13 | "fileName": "FlightWeatherWithAirportCode.csv", 14 | "folderPath": "flight-weather", 15 | "fileSystem": "landing" 16 | }, 17 | "columnDelimiter": ",", 18 | "escapeChar": "\\", 19 | "quoteChar": "\"" 20 | }, 21 | "schema": [ 22 | { 23 | "type": "String" 24 | }, 25 | { 26 | "type": "String" 27 | }, 28 | { 29 | "type": "String" 30 | }, 31 | { 32 | "type": "String" 33 | }, 34 | { 35 | "type": "String" 36 | }, 37 | { 38 | "type": "String" 39 | }, 40 | { 41 | "type": "String" 42 | }, 43 | { 44 | "type": "String" 45 | }, 46 | { 47 | "type": "String" 48 | }, 49 | { 50 | "type": "String" 51 | }, 52 | { 53 | "type": "String" 54 | }, 55 | { 56 | "type": "String" 57 | }, 58 | { 59 | "type": "String" 60 | }, 61 | { 62 | "type": "String" 63 | }, 64 | { 65 | "type": "String" 66 | }, 67 | { 68 | "type": "String" 69 | }, 70 | { 71 | "type": "String" 72 | }, 73 | { 74 | "type": "String" 75 | }, 76 | { 77 | "type": "String" 78 | }, 79 | { 80 | "type": "String" 81 | }, 82 | { 83 | "type": "String" 84 | }, 85 | { 86 | "type": "String" 87 | }, 88 | { 89 | "type": "String" 90 | }, 91 | { 92 | "type": "String" 93 | }, 94 | { 95 | "type": "String" 96 | }, 97 | { 98 | "type": "String" 99 | }, 100 | { 101 | "type": "String" 102 | }, 103 | { 104 | "type": "String" 105 | }, 106 | { 107 | "type": "String" 108 | } 109 | ] 110 | } 111 | } -------------------------------------------------------------------------------- /data-platform/adf/dataset/FlightsWeatherSource.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "FlightsWeatherSource", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "AzureBlobStorageLinkedService", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobStorageLocation", 13 | "fileName": "FlightWeatherWithAirportCode.csv", 14 | "container": "flights-data" 15 | }, 16 | "columnDelimiter": ",", 17 | "escapeChar": "\\", 18 | "quoteChar": "\"" 19 | }, 20 | "schema": [ 21 | { 22 | "type": "String" 23 | }, 24 | { 25 | "type": "String" 26 | }, 27 | { 28 | "type": "String" 29 | }, 30 | { 31 | "type": "String" 32 | }, 33 | { 34 | "type": "String" 35 | }, 36 | { 37 | "type": "String" 38 | }, 39 | { 40 | "type": "String" 41 | }, 42 | { 43 | "type": "String" 44 | }, 45 | { 46 | "type": "String" 47 | }, 48 | { 49 | "type": "String" 50 | }, 51 | { 52 | "type": "String" 53 | }, 54 | { 55 | "type": "String" 56 | }, 57 | { 58 | "type": "String" 59 | }, 60 | { 61 | "type": "String" 62 | }, 63 | { 64 | "type": "String" 65 | }, 66 | { 67 | "type": "String" 68 | }, 69 | { 70 | "type": "String" 71 | }, 72 | { 73 | "type": "String" 74 | }, 75 | { 76 | "type": "String" 77 | }, 78 | { 79 | "type": "String" 80 | }, 81 | { 82 | "type": "String" 83 | }, 84 | { 85 | "type": "String" 86 | }, 87 | { 88 | "type": "String" 89 | }, 90 | { 91 | "type": "String" 92 | }, 93 | { 94 | "type": "String" 95 | }, 96 | { 97 | "type": "String" 98 | }, 99 | { 100 | "type": "String" 101 | }, 102 | { 103 | "type": "String" 104 | }, 105 | { 106 | "type": "String" 107 | } 108 | ] 109 | } 110 | } -------------------------------------------------------------------------------- /data-platform/adf/linkedService/ADLSLinkedService.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ADLSLinkedService", 3 | "properties": { 4 | "annotations": [], 5 | "type": "AzureBlobFS", 6 | "typeProperties": { 7 | "url": "" 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /data-platform/adf/linkedService/AzureBlobStorageLinkedService.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "AzureBlobStorageLinkedService", 3 | "properties": { 4 | "annotations": [], 5 | "type": "AzureBlobStorage", 6 | "typeProperties": { 7 | "connectionString": { 8 | "type": "AzureKeyVaultSecret", 9 | "store": { 10 | "referenceName": "AzureKeyVaultLinkedService", 11 | "type": "LinkedServiceReference" 12 | }, 13 | "secretName": "StorageAccountConnectionString" 14 | } 15 | } 16 | } 17 | } -------------------------------------------------------------------------------- /data-platform/adf/linkedService/AzureDatabricksLinkedService.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "AzureDatabricksLinkedService", 3 | "properties": { 4 | "annotations": [], 5 | "type": "AzureDatabricks", 6 | "typeProperties": { 7 | "domain": "", 8 | "authentication": "MSI", 9 | "workspaceResourceId": "", 10 | "existingClusterId": "mycluster" 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /data-platform/adf/linkedService/AzureKeyVaultLinkedService.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "AzureKeyVaultLinkedService", 3 | "properties": { 4 | "annotations": [], 5 | "type": "AzureKeyVault", 6 | "typeProperties": { 7 | "baseUrl": "" 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /data-platform/adf/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts":{ 3 | "build":"node node_modules/@microsoft/azure-data-factory-utilities/lib/index" 4 | }, 5 | "dependencies":{ 6 | "@microsoft/azure-data-factory-utilities":"^0.1.3" 7 | } 8 | } -------------------------------------------------------------------------------- /data-platform/adf/publish_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "publishBranch": "factory/adf_publish" 3 | } -------------------------------------------------------------------------------- /data-platform/notebooks/.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E266,E501,E722,F821,E402,F403,F405,W292 -------------------------------------------------------------------------------- /data-platform/notebooks/Shared/DataOps/00 - Temp.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | print("hello world") 3 | -------------------------------------------------------------------------------- /data-platform/notebooks/Shared/DataOps/01 ADLS Mount.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # Databricks notebook source 3 | import os 4 | for adls_zone in ["landing", "refined", "trusted"]: 5 | if not os.path.exists("/dbfs/mnt/{}/".format(adls_zone)): 6 | mount_point = "/mnt/{}".format(adls_zone) 7 | configs = { 8 | "fs.azure.account.auth.type": "OAuth", 9 | "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider", 10 | "fs.azure.account.oauth2.client.id": dbutils.secrets.get(scope="dataops", key="clientId"), 11 | "fs.azure.account.oauth2.client.secret": dbutils.secrets.get(scope="dataops", key="clientSecret"), 12 | "fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/{}/oauth2/token".format(dbutils.secrets.get(scope="dataops", key="tenantId")), 13 | "fs.azure.createRemoteFileSystemDuringInitialization": "true" 14 | } 15 | dbutils.fs.mount( 16 | source="abfss://{}@{}.dfs.core.windows.net".format(adls_zone, dbutils.secrets.get(scope="dataops", key="dataLakeName")), 17 | mount_point=mount_point, 18 | extra_configs=configs) 19 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | /src/.vscode/ 132 | !/src/.vscode/settings.json 133 | !/src/.vscode/tasks.json 134 | !/src/.vscode/launch.json 135 | !/src/.vscode/extensions.json 136 | *.code-workspace 137 | 138 | # Local History for Visual Studio Code 139 | ./src/.history/ 140 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Python: Behave", 6 | "type": "python", 7 | "request": "launch", 8 | "module": "behave", 9 | "console": "integratedTerminal", 10 | "args": [ 11 | "${file}" 12 | ] 13 | }, 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "env/bin/python" 3 | } 4 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/README.md: -------------------------------------------------------------------------------- 1 | # Motivation 2 | 3 | Testing data pipelines has unique challenges that makes it different from testing traditional software. You have data pipelines that pulls data from many source systems, ensure data quality (i.e. ensure that bad data is identified, then blocked, scrubbed, fixed, or just logged), combines this data, transforms and scrubs it. Then the data is stored in some processed state for consumption by downstream systems, analytics platforms, or data scientists. These pipelines often process data from *hundreds* or even *thousands* of sources. You run your pipelines and get *several* million new rows in your consumption layer. 4 | 5 | Then you create full end-to-end functional tests for the pipelines. The pipelines are getting more complex over time, and the tests are becoming harder to understand and maintain. Then you start thinking: 6 | 7 | * How to make the tests as readable as possible? 8 | * How to improve tests maintainability? 9 | * *How to effectively communicate the **current behavior** of the data pipelines with the team or across teams?* 10 | 11 | Leveraging the concepts of Behavior-Driven Development could be the answer for these questions. BDD uses **human-readable** descriptions of software user requirements as the basis for software tests, where we define a shared vocabulary between stakeholders, domain experts, and engineers. This process involves the definition of entities, events, and outputs that the users care about, and giving them names that everybody can agree on. 12 | 13 | ## Testing Strategy 14 | 15 | ### Language and Frameworks 16 | 17 | Data engineers and data scientists are turning decisively to Python - according to the [O'Reilly annual usage analysis](https://www.oreilly.com/radar/oreilly-2020-platform-analysis/) - due to its applicability and its tools for data analysis and ML/AI. 18 | 19 | For this reason, the tests in this repository are written in Python using the most used open-source BDD framework called [behave](https://github.com/behave/behave). The framework leverages the use of [Gherkin](https://cucumber.io/docs/gherkin/reference/) to write tests, a well-known language used in BDD designed to be human readable. 20 | 21 | ### Structure of tests 22 | 23 | Essentially the test files are structured in two levels: 24 | 25 | * **Features**: Files where we specify the expected behavior of the data pipelines based on the existing requirements that can be understood by all people involved (e.g. data engineers, data scientists, business analysts). The specifications are written in Gherkin format. 26 | * **Steps**: Files where we implement the scenarios defined on feature files. These files are written in Python. 27 | 28 | ## Prerequisites 29 | 30 | * An Azure account with an active subscription ([Create one for free](https://azure.microsoft.com/en-us/free/?ref=microsoft.com&utm_source=microsoft.com&utm_medium=docs&utm_campaign=visualstudio) 😁) 31 | * [Python 3.4+](https://www.python.org/downloads/) installed and working on you developing environment. Use of Visual Studio Code is recomended. 32 | 33 | ## Getting Started 34 | 35 | If using Visual Studio Code open a bash Terminal. 36 | Navigate to the `src` directory under this current path (data-platform/src) and create a virtual environment: 37 | 38 | ```sh 39 | python3 -m venv env 40 | source env/bin/activate/scripts/activate 41 | ``` 42 | 43 | Navigate to the `bdd-adf-pipelines` directory and then install the required packages: 44 | 45 | ```sh 46 | cd bdd-adf-pipelines 47 | pip3 install -r requirements.txt 48 | ``` 49 | 50 | Create the following environment variables using the values for your lab Azure environment: 51 | 52 | ```sh 53 | export CLIENT_ID="" 54 | export CLIENT_SECRET="" 55 | export SUBSCRIPTION_ID="" 56 | export TENANT_ID="" 57 | export ADF_NAME="" 58 | export RESOURCE_GROUP_NAME="" 59 | export STORAGE_ACCOUNT_NAME="" 60 | export STORAGE_ACCOUNT_KEY="" 61 | ``` 62 | 63 | >The storage account was created on the Exercise 3, review Task 4 to see it's name and get the Accoutn Key from the Portal. 64 | 65 | Then run the following command to start the BDD tests: 66 | 67 | ```sh 68 | behave 69 | ``` 70 | 71 | The result should look similar to the next image: 72 | 73 | ![Behave Results](/lab-files/media/behave-results.png) 74 | 75 | ## References 76 | 77 | * [The challenge of testing Data Pipelines](https://medium.com/slalom-build/the-challenge-of-testing-data-pipelines-4450744a84f1) 78 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/data-platform/src/bdd-adf-pipelines/core/__init__.py -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/core/models/__init__.py: -------------------------------------------------------------------------------- 1 | from core.models.service_principal import ServicePrincipal 2 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/core/models/service_principal.py: -------------------------------------------------------------------------------- 1 | class ServicePrincipal: 2 | def __init__(self, client_id: str, client_secret: str, subscription_id: str, tenant_id: str): 3 | self.client_id = client_id 4 | self.client_secret = client_secret 5 | self.subscription_id = subscription_id 6 | self.tenant_id = tenant_id 7 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/core/services/__init__.py: -------------------------------------------------------------------------------- 1 | from core.services.adls import Adls 2 | from core.services.datafactory import DataFactory 3 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/core/services/adls.py: -------------------------------------------------------------------------------- 1 | from azure.core.paging import ItemPaged 2 | from azure.storage.filedatalake import * 3 | from typing import List 4 | import os 5 | 6 | class Adls: 7 | def __init__(self, storage_account_name: str, storage_account_key: str) -> None: 8 | self.client: DataLakeServiceClient = self.get_client(storage_account_name, storage_account_key) 9 | 10 | def get_client(self, storage_account_name: str, storage_account_key: str) -> DataLakeServiceClient: 11 | account_url: str = f"https://{storage_account_name}.dfs.core.windows.net" 12 | return DataLakeServiceClient(account_url, storage_account_key) 13 | 14 | def container_exists(self, container_name: str) -> bool: 15 | file_system_client: FileSystemClient = self.client.get_file_system_client(container_name) 16 | return file_system_client is not None 17 | 18 | def get_latest_log_content(self, container_name: str, directory_name: str) -> str: 19 | file_system_client: FileSystemClient = self.client.get_file_system_client(container_name) 20 | directory_client: DataLakeDirectoryClient = file_system_client.get_directory_client(directory_name) 21 | log_file_name: str = self.get_latest_log_file_name(file_system_client, directory_name) 22 | 23 | return self.get_log_content(directory_client, log_file_name) 24 | 25 | def get_latest_log_file_name(self, file_system_client: FileSystemClient, directory_name: str) -> str: 26 | paths: ItemPaged = file_system_client.get_paths(directory_name) 27 | 28 | if not paths: 29 | raise Exception("No log files were found.") 30 | 31 | file_names: List[str] = [path.name for path in paths] 32 | return os.path.basename(file_names[-1]) 33 | 34 | def get_log_content(self, directory_client: DataLakeDirectoryClient, log_file_name: str) -> str: 35 | file_client: DataLakeFileClient = directory_client.get_file_client(log_file_name) 36 | downloader: StorageStreamDownloader = file_client.download_file() 37 | content_bytes: bytes = downloader.readall() 38 | 39 | return content_bytes.decode("utf-8") 40 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/core/services/datafactory.py: -------------------------------------------------------------------------------- 1 | from azure.identity import ClientSecretCredential 2 | from azure.mgmt.datafactory import DataFactoryManagementClient 3 | from azure.mgmt.datafactory.models import ActivityRunsQueryResponse, CreateRunResponse, Factory, PipelineRun, RunFilterParameters 4 | from core.models import ServicePrincipal 5 | from datetime import datetime, timedelta 6 | from typing import Optional 7 | import time 8 | 9 | class DataFactory: 10 | def __init__(self, adf_name: str, resource_group_name: str, service_principal: ServicePrincipal) -> None: 11 | self.factory_name: str = adf_name 12 | self.resource_group_name: str = resource_group_name 13 | self.service_principal: ServicePrincipal = service_principal 14 | self.client: DataFactoryManagementClient = self.get_client() 15 | 16 | def get_client(self) -> DataFactoryManagementClient: 17 | credentials = ClientSecretCredential( 18 | client_id=self.service_principal.client_id, 19 | client_secret=self.service_principal.client_secret, 20 | tenant_id=self.service_principal.tenant_id) 21 | 22 | return DataFactoryManagementClient(credentials, self.service_principal.subscription_id) 23 | 24 | def exists(self) -> bool: 25 | factory: Optional[Factory] = self.client.factories.get(self.resource_group_name, self.factory_name) 26 | return factory != None 27 | 28 | def run_pipeline(self, pipeline_name: str) -> bool: 29 | run_id: str = self.get_run_id(pipeline_name) 30 | pipeline_run: PipelineRun = self.get_pipeline_run(run_id) 31 | pipeline_run_status: str = self.get_pipeline_run_status(pipeline_run) 32 | 33 | while pipeline_run_status != 'Succeeded': 34 | if pipeline_run_status == 'Failed': 35 | return False 36 | 37 | pipeline_run_status = self.get_pipeline_run_status(pipeline_run) 38 | 39 | return True 40 | 41 | def get_run_id(self, pipeline_name: str) -> str: 42 | run_response: CreateRunResponse = self.client.pipelines.create_run( 43 | resource_group_name=self.resource_group_name, 44 | factory_name=self.factory_name, 45 | pipeline_name=pipeline_name, 46 | parameters={}) 47 | 48 | return run_response.run_id 49 | 50 | def get_pipeline_run(self, run_id: str) -> PipelineRun: 51 | return self.client.pipeline_runs.get(self.resource_group_name, self.factory_name, run_id) 52 | 53 | def get_query_response(self, pipeline_run: PipelineRun) -> ActivityRunsQueryResponse: 54 | last_updated_after: datetime = datetime.now() - timedelta(1) 55 | last_updated_before: datetime = datetime.now() + timedelta(1) 56 | filter_params = RunFilterParameters(last_updated_after=last_updated_after, last_updated_before=last_updated_before) 57 | 58 | return self.client.activity_runs.query_by_pipeline_run( 59 | self.resource_group_name, self.factory_name, pipeline_run.run_id, filter_params) 60 | 61 | def get_pipeline_run_status(self, pipeline_run: PipelineRun) -> str: 62 | query_response: ActivityRunsQueryResponse = self.get_query_response(pipeline_run) 63 | 64 | while not query_response.value: 65 | query_response = self.get_query_response(pipeline_run) 66 | time.sleep(1) 67 | 68 | return query_response.value[0].status 69 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/features/environment.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from core.models import ServicePrincipal 3 | from core.services import Adls, DataFactory 4 | import os 5 | 6 | # Service principal environment variables 7 | client_id: str = os.getenv('CLIENT_ID') 8 | client_secret: str = os.getenv('CLIENT_SECRET') 9 | subscription_id: str = os.getenv('SUBSCRIPTION_ID') 10 | tenant_id: str = os.getenv('TENANT_ID') 11 | 12 | # Data Factory environment variables 13 | adf_name: str = os.getenv('ADF_NAME') 14 | resource_group_name: str = os.getenv('RESOURCE_GROUP_NAME') 15 | 16 | # ADLS environment variables 17 | storage_account_name: str = os.getenv('STORAGE_ACCOUNT_NAME') 18 | storage_account_key: str = os.getenv('STORAGE_ACCOUNT_KEY') 19 | 20 | # Pipelines 21 | pipelines: list = ["ProcessFlightsDelaysData"] 22 | 23 | def before_all(context: Any) -> None: 24 | print("Starting the execution of data pipelines...\n") 25 | 26 | adf: DataFactory = setup_data_factory(context) 27 | _ = run_pipelines(adf, context) 28 | _ = setup_adls(context) 29 | 30 | def setup_data_factory(context: Any) -> None: 31 | service_principal = ServicePrincipal(client_id, client_secret, subscription_id, tenant_id) 32 | adf = DataFactory(adf_name, resource_group_name, service_principal) 33 | 34 | if not adf.exists(): 35 | raise Exception(f"The data factory '{adf_name}' could not be found.") 36 | 37 | return adf 38 | 39 | def run_pipelines(adf: DataFactory, context: Any) -> None: 40 | pipeline_results: dict = {} 41 | 42 | # Run all pipelines 43 | for pipeline in pipelines: 44 | pipeline_results[pipeline] = adf.run_pipeline(pipeline) 45 | 46 | context.pipeline_results = pipeline_results 47 | 48 | def setup_adls(context: Any) -> None: 49 | context.adls = Adls(storage_account_name, storage_account_key) 50 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/features/flight_delays.feature: -------------------------------------------------------------------------------- 1 | Feature: Flight Delays 2 | The data pipeline serves weather and flight delay data for: 3 | - Auditing purposes 4 | - Data scientists, enabling them to produce flight delay predictions for customers 5 | - BI analysts, enabling them to analyze the costs of flight delays over time. 6 | 7 | Background: 8 | Given successful execution of the "ProcessFlightsDelaysData" data pipeline 9 | 10 | Scenario: Weather and flight delay transformed data for data scientists 11 | Given the "trusted" container exists in the data lake 12 | Then at least 100 flights must exist for all airports 13 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/features/steps/common.py: -------------------------------------------------------------------------------- 1 | from behave import given 2 | from typing import Any 3 | 4 | @given('successful execution of the "{pipeline_name}" data pipeline') 5 | def step_impl(context: Any, pipeline_name: str) -> None: 6 | if pipeline_name in context.pipeline_results.keys(): 7 | assert context.pipeline_results[pipeline_name] is True 8 | else: 9 | raise Exception(f"The data pipeline '{pipeline_name}' was not configured on environments.py to be executed.") 10 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/features/steps/flight_delays.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Optional 2 | from behave import given, then 3 | import csv 4 | from io import StringIO 5 | 6 | @given('the "{container_name}" container exists in the data lake') 7 | def check_if_adls_container_exists(context: Any, container_name: str) -> None: 8 | container_exists: bool = context.adls.container_exists(container_name) 9 | context.container_name = container_name 10 | assert container_exists is True 11 | 12 | @then('at least {min_flights:d} flights must exist for all airports') 13 | def check_if_flights_exist(context: Any, min_flights: int) -> None: 14 | log_content: str = context.adls.get_latest_log_content(context.container_name, "logs") 15 | reader = get_csv_reader(log_content) 16 | 17 | for row in reader: 18 | airport_flights: int = int(row[1]) 19 | assert airport_flights > min_flights 20 | 21 | def get_csv_reader(log_content: str): 22 | file_content = StringIO(log_content) 23 | reader = csv.reader(file_content, delimiter=',') 24 | headers: Optional(List[str]) = next(reader, None) 25 | 26 | if not headers: 27 | raise Exception("The log file has no content.") 28 | 29 | return reader 30 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/requirements.txt: -------------------------------------------------------------------------------- 1 | azure-identity==1.5.0 2 | azure-mgmt-datafactory==1.0.0 3 | azure-mgmt-resource==16.0.0 4 | azure-storage-file-datalake==12.3.0 5 | behave==1.2.6 6 | -------------------------------------------------------------------------------- /data-platform/src/bdd-adf-pipelines/scripts/junit_converter.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List 2 | from xml.etree import ElementTree 3 | from xml.dom import minidom 4 | from xml.etree.ElementTree import Element, SubElement, Comment 5 | import sys, json 6 | 7 | def prettify_test_suites_to_xml(test_suites: Element) -> str: 8 | rough_string: str = ElementTree.tostring(test_suites, 'utf-8') 9 | reparsed: str = minidom.parseString(rough_string) 10 | return reparsed.toprettyxml(indent=" ") 11 | 12 | def create_test_case_attrib(test: Any) -> Dict[str, Any]: 13 | return { 14 | 'classname': test['classname'], 15 | 'name': f"{test['step']['keyword']} {test['step']['name']}", 16 | 'time': str(test['step']['result']['duration']) 17 | } 18 | 19 | def create_error_test_case_attrib(failed_test: Any) -> Dict[str, str]: 20 | return { 21 | 'message': ' '.join(failed_test['step']['result']['error_message']), 22 | 'type': 'exception' 23 | } 24 | 25 | def create_failed_test_case(test_suite, failed_test: Any) -> Element: 26 | test_case_attributes: Dict[str, Any] = create_test_case_attrib(failed_test) 27 | test_case: Element = SubElement(test_suite, 'testcase', test_case_attributes) 28 | error_test_case_attrib: Dict[str, str] = create_error_test_case_attrib(failed_test) 29 | 30 | return SubElement(test_case, 'error', error_test_case_attrib) 31 | 32 | def create_passed_test_case(test_suite, passed_test: Any) -> Element: 33 | test_case_attrib: Dict[str, Any] = create_test_case_attrib(passed_test) 34 | return SubElement(test_suite, 'testcase', test_case_attrib) 35 | 36 | def create_test_suite(test_suites: Element, test_suite_attributes: Dict[str, str]) -> Element: 37 | return SubElement(test_suites, 'testsuite', test_suite_attributes) 38 | 39 | def test_passed(step: Any) -> bool: 40 | return step['result']['status'] == 'passed' 41 | 42 | def append_test_suite(test_suites: Element, feature: Any) -> None: 43 | passed_tests: List[Dict[str, Any]] = [] 44 | failed_tests: List[Dict[str, Any]] = [] 45 | total_tests_duration_seconds: float = 0.0 46 | 47 | scenarios: List[Any] = [element for element in feature['elements'] if element['type'] == 'scenario'] 48 | 49 | for scenario in scenarios: 50 | steps_with_results: List[Any] = [step for step in scenario['steps'] if 'result' in step.keys()] 51 | 52 | for step in steps_with_results: 53 | test: Dict[str, Any] = {'classname': scenario['name'], 'step': step} 54 | 55 | if test_passed(step): 56 | passed_tests.append(test) 57 | else: 58 | failed_tests.append(test) 59 | 60 | total_tests_duration_seconds = total_tests_duration_seconds + step['result']['duration'] 61 | 62 | test_suite_attributes: Dict[str, str] = { 63 | 'id': '1', 64 | 'name': feature['name'], 65 | 'hostname': 'Azure DevOps', 66 | 'time': str(total_tests_duration_seconds), 67 | 'tests': str(sum([len(scenario['steps']) for scenario in scenarios])), 68 | 'failures': str(len(failed_tests)) 69 | } 70 | 71 | test_suite: Element = create_test_suite(test_suites, test_suite_attributes) 72 | 73 | for failed_test in failed_tests: 74 | _: Element = create_failed_test_case(test_suite, failed_test) 75 | 76 | for passed_test in passed_tests: 77 | _: Element = create_passed_test_case(test_suite, passed_test) 78 | 79 | def create_test_suites(behave_file_path: str) -> str: 80 | test_suites = Element('testsuites') 81 | 82 | with open(behave_file_path) as behave_file: 83 | features: List[Any] = json.load(behave_file) 84 | 85 | for feature in features: 86 | append_test_suite(test_suites, feature) 87 | 88 | return prettify_test_suites_to_xml(test_suites) 89 | 90 | def main(): 91 | print('Converting...') 92 | 93 | behave_test_results_file: str = sys.argv[1] 94 | junit_output_file_path: str = sys.argv[2] 95 | 96 | with open(junit_output_file_path, "w") as junit_output_file: 97 | junit_test_suites: str = create_test_suites(behave_test_results_file) 98 | junit_output_file.write(junit_test_suites) 99 | 100 | print('Done!') 101 | 102 | if __name__ == '__main__': 103 | main() 104 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | # Change Log 3 | 4 | Este documento representa o log de mudanças do projeto `dataopslib`. O formato tem como base a estrutura do [Keep a Changelog](http://keepachangelog.com/), aderindo ao [Semantic Versioning](http://semver.org/). 5 | 6 | ## [Unreleased] - dd-mm-yyyy 7 | 8 | ### **Added** 9 | 10 | - [`package version`] - dd-mm-yyyy 11 | - Feature 1 description 12 | - Feature 2 description 13 | 14 | - [`package version`] - dd-mm-yyyy 15 | - Feature 1 description 16 | - Feature 2 description 17 | 18 | ### **Changed** 19 | 20 | - [`package version`] - dd-mm-yyyy 21 | - change 1 description 22 | - change 2 description 23 | 24 | ### **Fixed** 25 | 26 | - [`package version`] - dd-mm-yyyy 27 | - patch 1 description 28 | - patch 2 description 29 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/MANIFEST.in: -------------------------------------------------------------------------------- 1 | 2 | include setup.py 3 | include dataopslib/spark/data_quality.py 4 | include dataopslib/spark/data_transformation.py 5 | include dataopslib/spark/functions.py 6 | include dataopslib/schemas.py 7 | 8 | recursive-include tests * 9 | recursive-include samples * 10 | recursive-exclude * __pycache__ 11 | recursive-exclude * *.py[co] 12 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/README.md: -------------------------------------------------------------------------------- 1 | # dataopslib 2 | 3 | ## Prerequisites 4 | 5 | - Python 3.x 6 | - Docker + Docker Compose 7 | 8 | ## Getting Started 9 | 10 | Create a virtual environment and install the required packages: 11 | 12 | ```sh 13 | python3 -m venv dataopslib_env 14 | source dataopslib_env/bin/activate 15 | 16 | pip3 install -r requirements.txt 17 | ``` 18 | 19 | ### **Running Apache Spark locally with Docker** 20 | 21 | Open the `spark` folder on your terminal and run the Docker compose to start an Apache Spark instance locally: 22 | 23 | ```sh 24 | docker-compose up 25 | ``` 26 | 27 | ### **Running samples** 28 | 29 | Open the sample files located in the `samples` directory on Visual Studio Code and run the project. 30 | 31 | ### **Deactivating the virtual environment** 32 | 33 | ```sh 34 | deactivate 35 | ``` 36 | 37 | ### **Building and testing the samples** 38 | 39 | ```sh 40 | flake8 ./dataopslib ./samples ./tests 41 | pytest --ignore=setup.py 42 | python3 setup.py sdist bdist_wheel 43 | ``` 44 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/data/AirportCodeLocationLookupClean.csv: -------------------------------------------------------------------------------- 1 | AIRPORT_ID,AIRPORT,DISPLAY_AIRPORT_NAME,LATITUDE,LONGITUDE 2 | 10001,01A,Afognak Lake Airport,58.10944444,-152.9066667 3 | 10003,03A,Bear Creek Mining Strip,65.54805556,-161.0716667 4 | 10004,04A,Lik Mining Camp,68.08333333,-163.1666667 5 | 10005,05A,Little Squaw Airport,67.57,-148.1838889 6 | 10006,06A,Kizhuyak Bay,57.74527778,-152.8827778 7 | 10007,07A,Klawock Seaplane Base,55.55472222,-133.1016667 8 | 10008,08A,Elizabeth Island Airport,59.15694444,-151.8291667 9 | 10009,09A,Augustin Island,59.36277778,-153.4305556 10 | 10010,1B1,Columbia County,42.29138889,-73.71027778 11 | 10011,1G4,Grand Canyon West,35.98611111,-113.8169444 12 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/data/FlightDelaysWithAirportCodes.csv: -------------------------------------------------------------------------------- 1 | Year,Month,DayofMonth,DayOfWeek,Carrier,CRSDepTime,DepDelay,DepDel15,CRSArrTime,ArrDelay,ArrDel15,Cancelled,OriginAirportCode,OriginAirportName,OriginLatitude,OriginLongitude,DestAirportCode,DestAirportName,DestLatitude,DestLongitude 2 | 2013,4,19,5,DL,837,-3,0,1138,1,0,0,DTW,Detroit Metro Wayne County,42.2125,-83.35333333,MIA,Miami International,25.79527778,-80.29 3 | 2013,4,19,5,DL,1705,0,0,2336,-8,0,0,SLC,Salt Lake City International,40.78833333,-111.9777778,JFK,John F. Kennedy International,40.64,-73.77861111 4 | 2013,4,19,5,DL,600,-4,0,851,-15,0,0,PDX,Portland International,45.58861111,-122.5969444,SLC,Salt Lake City International,40.78833333,-111.9777778 5 | 2013,4,19,5,DL,1630,28,1,1903,24,1,0,STL,Lambert-St. Louis International,38.74861111,-90.37,DTW,Detroit Metro Wayne County,42.2125,-83.35333333 6 | 2013,4,19,5,DL,1615,-6,0,1805,-11,0,0,CVG,Cincinnati/Northern Kentucky International,39.04888889,-84.66777778,LAX,Los Angeles International,33.9425,-118.4080556 7 | 2013,4,19,5,DL,1726,-1,0,1818,-19,0,0,ATL,Hartsfield-Jackson Atlanta International,33.63666667,-84.42777778,STL,Lambert-St. Louis International,38.74861111,-90.37 8 | 2013,4,19,5,DL,1900,0,0,2133,-1,0,0,STL,Lambert-St. Louis International,38.74861111,-90.37,ATL,Hartsfield-Jackson Atlanta International,33.63666667,-84.42777778 9 | 2013,4,19,5,DL,2145,15,1,2356,24,1,0,ATL,Hartsfield-Jackson Atlanta International,33.63666667,-84.42777778,SLC,Salt Lake City International,40.78833333,-111.9777778 10 | 2013,4,19,5,DL,2157,33,1,2333,34,1,0,ATL,Hartsfield-Jackson Atlanta International,33.63666667,-84.42777778,AUS,Austin - Bergstrom International,30.19444444,-97.67 11 | 2013,4,19,5,DL,1900,323,1,2055,322,1,0,DCA,Ronald Reagan Washington National,38.85138889,-77.03777778,ATL,Hartsfield-Jackson Atlanta International,33.63666667,-84.42777778 12 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/data/FlightWeatherWithAirportCode.csv: -------------------------------------------------------------------------------- 1 | Year,Month,Day,Time,TimeZone,SkyCondition,Visibility,WeatherType,DryBulbFarenheit,DryBulbCelsius,WetBulbFarenheit,WetBulbCelsius,DewPointFarenheit,DewPointCelsius,RelativeHumidity,WindSpeed,WindDirection,ValueForWindCharacter,StationPressure,PressureTendency,PressureChange,SeaLevelPressure,RecordType,HourlyPrecip,Altimeter,AirportCode,DISPLAY_AIRPORT_NAME,LATITUDE,LONGITUDE 2 | 2013,4,1,56,-4,FEW018 SCT044 BKN070,10.00,-RA,76,24.4,74,23.3,73,22.8,90,13,080,,30.06,,,30.06,AA,T,30.07,SJU,Luis Munoz Marin International,18.43944444,-66.00222222 3 | 2013,4,1,156,-4,FEW037 SCT070,10.00,"",76,24.4,73,22.5,71,21.7,85,10,090,,30.05,6,17,30.05,AA,"",30.06,SJU,Luis Munoz Marin International,18.43944444,-66.00222222 4 | 2013,4,1,256,-4,FEW037 SCT070,10.00,"",76,24.4,73,22.5,71,21.7,85,9,100,,30.03,,,30.03,AA,"",30.04,SJU,Luis Munoz Marin International,18.43944444,-66.00222222 5 | 2013,4,1,356,-4,FEW025 SCT070,10.00,"",76,24.4,72,22.2,70,21.1,82,9,100,,30.02,,,30.03,AA,"",30.03,SJU,Luis Munoz Marin International,18.43944444,-66.00222222 6 | 2013,4,1,456,-4,FEW025,10.00,"",76,24.4,72,22.2,70,21.1,82,7,110,,30.03,5,4,30.04,AA,"",30.04,SJU,Luis Munoz Marin International,18.43944444,-66.00222222 7 | 2013,4,1,556,-4,FEW025 SCT080,10.00,"",76,24.4,71,21.8,69,20.6,79,7,100,,30.04,,,30.05,AA,"",30.05,SJU,Luis Munoz Marin International,18.43944444,-66.00222222 8 | 2013,4,1,656,-4,FEW028 BKN080,10.00,"",77,25.0,71,21.7,68,20.0,74,9,110,,30.07,,,30.07,AA,"",30.08,SJU,Luis Munoz Marin International,18.43944444,-66.00222222 9 | 2013,4,1,756,-4,FEW028 BKN080,10.00,"",79,26.1,72,22.4,69,20.6,72,13,100,,30.09,3,20,30.10,AA,"",30.10,SJU,Luis Munoz Marin International,18.43944444,-66.00222222 10 | 2013,4,1,856,-4,FEW030 BKN080,10.00,"",82,27.8,73,22.9,69,20.6,65,14,100,21,30.11,,,30.11,AA,"",30.12,SJU,Luis Munoz Marin International,18.43944444,-66.00222222 11 | 2013,4,1,956,-4,SCT035 BKN090,10.00,"",83,28.3,74,23.0,69,20.6,63,16,090,23,30.11,,,30.12,AA,"",30.12,SJU,Luis Munoz Marin International,18.43944444,-66.00222222 12 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/data/sample_data.csv: -------------------------------------------------------------------------------- 1 | ID,START_DATE,END_DATE 2 | 1,03/01/2021 09:00:00,03/02/2021 09:00:00 3 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/dataopslib/_init_.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Top-level package for dataopslib.""" 5 | 6 | __author__ = """Marcel Aldecoa""" 7 | __email__ = 'marcel.aldecoa@microsoft.com' 8 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/dataopslib/schemas.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Main module.""" 5 | 6 | from pyspark.sql.types import (StructType, StructField, StringType, IntegerType, DoubleType, ShortType) 7 | 8 | 9 | def get_schema(schema_name): 10 | 11 | if schema_name == 'sample_schema': 12 | schema = StructType([ 13 | StructField('ID', IntegerType()), 14 | StructField('START_DATE', StringType()), 15 | StructField('END_DATE', StringType()), 16 | ]) 17 | 18 | if schema_name == 'AirportCodeSchema': 19 | schema = StructType([ 20 | StructField('AIRPORT_ID', IntegerType()), 21 | StructField('AIRPORT', StringType()), 22 | StructField('DISPLAY_AIRPORT_NAME', StringType()), 23 | StructField('LATITUDE', DoubleType()), 24 | StructField('LONGITUDE', DoubleType()), 25 | ]) 26 | 27 | if schema_name == 'FlightDelaysSchema': 28 | schema = StructType([ 29 | StructField('Year', IntegerType()), 30 | StructField('Month', IntegerType()), 31 | StructField('DayofMonth', IntegerType()), 32 | StructField('DayOfWeek', IntegerType()), 33 | StructField('Carrier', StringType()), 34 | StructField('CRSDepTime', IntegerType()), 35 | StructField('DepDelay', IntegerType()), 36 | StructField('DepDel15', ShortType()), 37 | StructField('CRSArrTime', IntegerType()), 38 | StructField('ArrDelay', IntegerType()), 39 | StructField('ArrDel15', ShortType()), 40 | StructField('Cancelled', ShortType()), 41 | StructField('OriginAirportCode', StringType()), 42 | StructField('OriginAirportName', StringType()), 43 | StructField('OriginLatitude', DoubleType()), 44 | StructField('OriginLongitude', DoubleType()), 45 | StructField('DestAirportCode', StringType()), 46 | StructField('DestAirportName', StringType()), 47 | StructField('DestLatitude', DoubleType()), 48 | StructField('DestLongitude', DoubleType()), 49 | ]) 50 | 51 | return schema 52 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/dataopslib/spark/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Spark-related functionality.""" 5 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/dataopslib/spark/data_quality.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | This script allows users to validate the quality of the datasets. 6 | """ 7 | 8 | from pyspark.sql.dataframe import DataFrame 9 | from pyspark.sql.functions import to_date 10 | from typing import List 11 | 12 | 13 | def has_null(df: DataFrame, cols: List[str]) -> bool: 14 | """Validates whether or not the DataFrame columns in the list have NULL values. 15 | 16 | Parameters 17 | ---------- 18 | df : DataFrame 19 | The DataFrame where the validation will occur. 20 | cols : list 21 | A list of strings with the name of the columns to be validaded. 22 | 23 | Returns 24 | ------- 25 | bool 26 | Returns a boolean indicating whether or not the columns have NULL values. 27 | """ 28 | return any([df.filter(df[col].isNull()).count() for col in cols]) 29 | 30 | 31 | def has_invalid_dates(df: DataFrame, cols: List[str], format: str) -> bool: 32 | """Validates whether or not the DataFrame columns in the list have invalid dates. 33 | 34 | Parameters 35 | ---------- 36 | df : DataFrame 37 | The DataFrame where the validation will occur. 38 | cols : list 39 | A list of strings with the name of the columns to be validaded. 40 | format: str 41 | The date/time format to be used. 42 | 43 | Returns 44 | ------- 45 | bool 46 | Returns a boolean indicating whether or not the columns have invalid dates. 47 | """ 48 | return any([df.filter(to_date(df[col], format).isNull()).count() for col in cols]) 49 | 50 | 51 | def has_inconsistent_dates(df: DataFrame, dateColumn1: str, dateColumn2: str, format: str, operator: classmethod) -> bool: 52 | """Validates whether or not the DataFrame has dateColumn1 and dateColumn2 inconsistent based on the operator 53 | used for the comparison. 54 | 55 | Parameters 56 | ---------- 57 | df : DataFrame 58 | The DataFrame where the validation will occur. 59 | dateColumn1: str 60 | The date column used in the left side of the operator function. 61 | dateColumn2: str 62 | The date column used in the right side of the operator function. 63 | format: str 64 | The date/time format to be used. 65 | operator: classmethod 66 | The operator function to be used for the comparison, such as: 67 | operator.lt 68 | operator.le 69 | operator.eq 70 | operator.ne 71 | operator.ge 72 | operator.gt 73 | 74 | Returns 75 | ------- 76 | bool 77 | Returns a boolean indicating whether or not the DataFrame has dateColumn1 and dateColumn2 inconsistent. 78 | """ 79 | return df.filter(operator(to_date(df[dateColumn1], format), to_date(df[dateColumn2], format))).count() > 0 80 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/dataopslib/spark/functions.py: -------------------------------------------------------------------------------- 1 | """This modules contains UDF functions that expands the regular Spark SQL language capabilities. 2 | """ 3 | import datetime 4 | import logging 5 | from dateutil.tz import tzlocal 6 | 7 | from pyspark.sql.types import TimestampType 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | try: 12 | from pyspark.sql import SparkSession 13 | _spark = SparkSession.builder.getOrCreate() 14 | 15 | add_seconds = _spark.udf.register("add_seconds", 16 | lambda dt, seconds: (dt + datetime.timedelta(seconds=int(seconds))).replace(tzinfo=tzlocal()), 17 | returnType=TimestampType()) 18 | 19 | add_minutes = _spark.udf.register("add_minutes", 20 | lambda dt, minutes: (dt + datetime.timedelta(minutes=int(minutes))).replace(tzinfo=tzlocal()), 21 | returnType=TimestampType()) 22 | 23 | add_hours = _spark.udf.register("add_hours", 24 | lambda dt, hours: (dt + datetime.timedelta(hours=int(hours))).replace(tzinfo=tzlocal()), 25 | returnType=TimestampType()) 26 | except ImportError: 27 | logger.error('Spark is not available in this context. This module functionality will be limited') 28 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = -p no:warnings 3 | junit_suite_name = dataopslib 4 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/requirements.txt: -------------------------------------------------------------------------------- 1 | pip 2 | wheel 3 | flake8 4 | flake8-junit-report==2.1.0 5 | python-dateutil==2.8.1 6 | coverage 7 | twine 8 | pytest 9 | pytest-runner 10 | databricks-cli 11 | pyspark==3.0.1 -------------------------------------------------------------------------------- /data-platform/src/dataopslib/samples/sample_has_inconsistent_dates.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pyspark.sql import SparkSession 5 | import operator 6 | try: 7 | import dataopslib.spark.data_quality as quality 8 | import dataopslib.schemas as schemas 9 | except ImportError: 10 | import os 11 | import sys 12 | cur_dir = os.path.dirname(__file__) 13 | # Add the parent directory in the search for modules when importing 14 | sys.path.append(os.path.abspath(os.path.join(cur_dir, os.pardir))) 15 | import dataopslib.spark.data_quality as quality 16 | import dataopslib.schemas as schemas 17 | 18 | 19 | spark = SparkSession.builder\ 20 | .master("local")\ 21 | .appName("sample_has_inconsistent_dates.py")\ 22 | .getOrCreate() 23 | spark.sparkContext.setLogLevel("ERROR") 24 | 25 | schema = schemas.get_schema("sample_schema") 26 | df = spark.read.csv("./data/sample_data.csv", header=True, schema=schema) 27 | 28 | has_inconsistent_dates = quality.has_inconsistent_dates(df, 'START_DATE', 'END_DATE', 29 | 'MM/dd/yyyy HH:mm:ss', 30 | operator.gt) 31 | if has_inconsistent_dates: 32 | print("The dataframe has START_DATE greater than END_DATE in the columns") 33 | else: 34 | print("The dataframe doesn't have START_DATE greater than END_DATE in the columns") 35 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/samples/sample_has_null.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pyspark.sql import SparkSession 5 | try: 6 | import dataopslib.spark.data_quality as quality 7 | import dataopslib.schemas as schemas 8 | except ImportError: 9 | import os 10 | import sys 11 | cur_dir = os.path.dirname(__file__) 12 | # Add the parent directory in the search for modules when importing 13 | sys.path.append(os.path.abspath(os.path.join(cur_dir, os.pardir))) 14 | import dataopslib.spark.data_quality as quality 15 | import dataopslib.schemas as schemas 16 | 17 | 18 | spark = SparkSession.builder\ 19 | .master("local")\ 20 | .appName("sample_has_null.py")\ 21 | .getOrCreate() 22 | spark.sparkContext.setLogLevel("ERROR") 23 | 24 | schema = schemas.get_schema("sample_schema") 25 | df = spark.read.csv("./data/sample_data.csv", header=True, schema=schema) 26 | 27 | cols = ['END_DATE', 'START_DATE'] 28 | has_null = quality.has_null(df, cols) 29 | if has_null: 30 | print("The dataframe has nulls in the columns", cols) 31 | else: 32 | print("The dataframe doesn't have nulls in the columns", cols) 33 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/samples/sample_has_valid_dates.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pyspark.sql import SparkSession 5 | try: 6 | import dataopslib.spark.data_quality as quality 7 | import dataopslib.schemas as schemas 8 | except ImportError: 9 | import os 10 | import sys 11 | cur_dir = os.path.dirname(__file__) 12 | # Add the parent directory in the search for modules when importing 13 | sys.path.append(os.path.abspath(os.path.join(cur_dir, os.pardir))) 14 | import dataopslib.spark.data_quality as quality 15 | import dataopslib.schemas as schemas 16 | 17 | 18 | spark = SparkSession.builder\ 19 | .master("local")\ 20 | .appName("sample_has_valid_dates.py")\ 21 | .getOrCreate() 22 | spark.sparkContext.setLogLevel("ERROR") 23 | 24 | schema = schemas.get_schema("sample_schema") 25 | df = spark.read.csv("./data/sample_data.csv", header=True, schema=schema) 26 | 27 | cols = ['END_DATE', 'START_DATE'] 28 | has_invalid_dates = quality.has_invalid_dates(df, cols, 'MM/dd/yyyy HH:mm:ss') 29 | if has_invalid_dates: 30 | print("The dataframe has invalid dates in the columns", cols) 31 | else: 32 | print("The dataframe doesn't have invalid dates in the columns", cols) 33 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/samples/sample_read_csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pyspark.sql import SparkSession 5 | try: 6 | import dataopslib.schemas as schemas 7 | except ImportError: 8 | import os 9 | import sys 10 | cur_dir = os.path.dirname(__file__) 11 | # Add the parent directory in the search for modules when importing 12 | sys.path.append(os.path.abspath(os.path.join(cur_dir, os.pardir))) 13 | import dataopslib.schemas as schemas 14 | 15 | spark = SparkSession.builder\ 16 | .master("local")\ 17 | .appName("sample_read_csv.py")\ 18 | .getOrCreate() 19 | spark.sparkContext.setLogLevel("ERROR") 20 | 21 | schema = schemas.get_schema("AirportCodeSchema") 22 | df = spark.read.csv("./data/AirportCodeLocationLookupClean.csv", header=True, schema=schema) 23 | df.printSchema() 24 | df.show() 25 | 26 | schema = schemas.get_schema("FlightDelaysSchema") 27 | df = spark.read.csv("./data/FlightDelaysWithAirportCodes.csv", header=True, schema=schema) 28 | df.printSchema() 29 | df.show() 30 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/samples/sample_transformation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pyspark.sql import SparkSession 5 | try: 6 | import dataopslib.schemas as schemas 7 | import dataopslib.spark.data_transformation as transformation 8 | except ImportError: 9 | import os 10 | import sys 11 | cur_dir = os.path.dirname(__file__) 12 | # Add the parent directory in the search for modules when importing 13 | sys.path.append(os.path.abspath(os.path.join(cur_dir, os.pardir))) 14 | import dataopslib.schemas as schemas 15 | import dataopslib.spark.data_transformation as transformation 16 | 17 | spark = SparkSession.builder\ 18 | .master("local")\ 19 | .appName("sample_read_csv.py")\ 20 | .getOrCreate() 21 | spark.sparkContext.setLogLevel("ERROR") 22 | 23 | schema = schemas.get_schema("sample_schema") 24 | df = spark.read.csv("./data/sample_data.csv", header=True, schema=schema) 25 | 26 | print("Original data sample...") 27 | df.show() 28 | 29 | print("Running date timezone transformations") 30 | newDF = transformation.transform_date_time_zone(df, "START_DATE", 31 | "America/Sao_Paulo") 32 | 33 | print("Running unixtime column agg") 34 | newDF = transformation.add_unixtime_column(df, "START_DATE", 35 | 'MM/dd/yyyy HH:mm:ss', 'START_DATE_UNIX') 36 | 37 | print("Dropping duplicates rows") 38 | newDF = transformation.drop_duplicates_rows(newDF, "ID") 39 | 40 | print("Dropping some columns") 41 | newDF = transformation.drop_columns(newDF, "ID") 42 | 43 | print("Here is what I got after running some transformation...") 44 | newDF.show() 45 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [flake8] 5 | exclude = docs 6 | max-line-length = 150 7 | 8 | [aliases] 9 | # Define setup.py command aliases here 10 | test = pytest 11 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """The setup script.""" 5 | 6 | import os 7 | from setuptools import setup, find_packages, find_namespace_packages 8 | 9 | version = os.environ.get("package_version") 10 | 11 | with open('README.md') as readme_file: 12 | readme = readme_file.read() 13 | 14 | requirements = [] 15 | setup_requirements = ['pytest-runner'] 16 | test_requirements = ['pytest'] 17 | 18 | setup( 19 | author="Marcel Aldecoa", 20 | author_email='marcel.aldecoa@microsoft.com', 21 | classifiers=[ 22 | 'Development Status :: 2 - Pre-Alpha', 23 | 'Intended Audience :: Developers', 24 | 'Natural Language :: English', 25 | 'Programming Language :: Python :: 3.6' 26 | ], 27 | description="The dataopslib library contains all the data quality and data transformation logic for the Adventure Works DataOps project.", 28 | install_requires=requirements, 29 | long_description=readme, 30 | long_description_content_type='text/markdown', 31 | include_package_data=True, 32 | keywords=[ 33 | 'dataopslib', 34 | 'data_quality', 35 | 'data_transformation' 36 | ], 37 | name='dataopslib', 38 | packages=['dataopslib'], 39 | setup_requires=setup_requirements, 40 | test_suite='tests', 41 | tests_require=test_requirements, 42 | url='https://dev.azure.com/csu-devsquad/advworks-dataops/_git/hol', 43 | version=version, 44 | python_requires='>=3.6' 45 | ) 46 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/spark/README.md: -------------------------------------------------------------------------------- 1 | # Running Spark Locally 2 | 3 | ## Pre-requisites 4 | 5 | - [Docker](https://www.docker.com/) 6 | - [JDK 11](https://openjdk.java.net/projects/jdk/11/) 7 | 8 | ## Installing Java JDK 11 9 | 10 | ```shell 11 | sudo apt update 12 | sudo apt install openjdk-11-jdk 13 | 14 | java --version 15 | ``` 16 | ## Running Docker Compose 17 | 18 | The Docker image used is the [Bitnami Spark](https://hub.docker.com/r/bitnami/spark/). 19 | 20 | ```shell 21 | docker-compose up 22 | ``` -------------------------------------------------------------------------------- /data-platform/src/dataopslib/spark/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | 3 | services: 4 | spark: 5 | image: docker.io/bitnami/spark:3-debian-10 6 | environment: 7 | - SPARK_MODE=master 8 | - SPARK_RPC_AUTHENTICATION_ENABLED=no 9 | - SPARK_RPC_ENCRYPTION_ENABLED=no 10 | - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no 11 | - SPARK_SSL_ENABLED=no 12 | ports: 13 | - '8080:8080' 14 | spark-worker-1: 15 | image: docker.io/bitnami/spark:3-debian-10 16 | environment: 17 | - SPARK_MODE=worker 18 | - SPARK_MASTER_URL=spark://spark:7077 19 | - SPARK_WORKER_MEMORY=1G 20 | - SPARK_WORKER_CORES=1 21 | - SPARK_RPC_AUTHENTICATION_ENABLED=no 22 | - SPARK_RPC_ENCRYPTION_ENABLED=no 23 | - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no 24 | - SPARK_SSL_ENABLED=no 25 | spark-worker-2: 26 | image: docker.io/bitnami/spark:3-debian-10 27 | environment: 28 | - SPARK_MODE=worker 29 | - SPARK_MASTER_URL=spark://spark:7077 30 | - SPARK_WORKER_MEMORY=1G 31 | - SPARK_WORKER_CORES=1 32 | - SPARK_RPC_AUTHENTICATION_ENABLED=no 33 | - SPARK_RPC_ENCRYPTION_ENABLED=no 34 | - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no 35 | - SPARK_SSL_ENABLED=no 36 | -------------------------------------------------------------------------------- /data-platform/src/dataopslib/tests/test_data_quality.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Tests for `dataopslib.data_quality` module.""" 5 | 6 | import pytest 7 | import operator 8 | try: 9 | import dataopslib.spark.data_quality as quality 10 | import dataopslib.schemas as schemas 11 | except ImportError: 12 | import os 13 | import sys 14 | cur_dir = os.path.dirname(__file__) 15 | # Add the parent directory in the search for modules when importing 16 | sys.path.append(os.path.abspath(os.path.join(cur_dir, os.pardir))) 17 | import dataopslib.spark.data_quality as quality 18 | import dataopslib.schemas as schemas 19 | 20 | 21 | @pytest.fixture 22 | def spark(): 23 | """Spark Session fixture 24 | """ 25 | from pyspark.sql import SparkSession 26 | 27 | spark = SparkSession.builder\ 28 | .master("local")\ 29 | .appName("Unit Testing")\ 30 | .getOrCreate() 31 | spark.sparkContext.setLogLevel("ERROR") 32 | return spark 33 | 34 | 35 | def test_has_null(spark): 36 | 37 | schema = schemas.get_schema("sample_schema") 38 | df = spark.read.csv("./data/sample_data.csv", header=True, schema=schema) 39 | 40 | cols = ['END_DATE', 'START_DATE'] 41 | has_null = quality.has_null(df, cols) 42 | 43 | assert has_null is False 44 | 45 | 46 | def test_has_invalid_dates(spark): 47 | 48 | schema = schemas.get_schema("sample_schema") 49 | df = spark.read.csv("./data/sample_data.csv", header=True, schema=schema) 50 | 51 | cols = ['END_DATE', 'START_DATE'] 52 | has_invalid_dates = quality.has_invalid_dates(df, cols, 'MM/dd/yyyy HH:mm:ss') 53 | 54 | assert has_invalid_dates is False 55 | 56 | 57 | def test_has_inconsistent_dates_operator_gt(spark): 58 | schema = schemas.get_schema("sample_schema") 59 | df = spark.read.csv("./data/sample_data.csv", header=True, schema=schema) 60 | 61 | has_inconsistent_dates = quality.has_inconsistent_dates(df, 'START_DATE', 'END_DATE', 62 | 'MM/dd/yyyy HH:mm:ss', 63 | operator.gt) 64 | 65 | assert has_inconsistent_dates is False 66 | 67 | 68 | def test_has_inconsistent_dates_operator_lt(spark): 69 | schema = schemas.get_schema("sample_schema") 70 | df = spark.read.csv("./data/sample_data.csv", header=True, schema=schema) 71 | 72 | has_inconsistent_dates = quality.has_inconsistent_dates(df, 'START_DATE', 'END_DATE', 73 | 'MM/dd/yyyy HH:mm:ss', 74 | operator.lt) 75 | 76 | assert has_inconsistent_dates is True 77 | -------------------------------------------------------------------------------- /hands-on-lab/lab-files/my-lab-file.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /hands-on-lab/media/02-One-Notebook-to-Rule-Them-All-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/02-One-Notebook-to-Rule-Them-All-1.png -------------------------------------------------------------------------------- /hands-on-lab/media/02-One-Notebook-to-Rule-Them-All-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/02-One-Notebook-to-Rule-Them-All-2.png -------------------------------------------------------------------------------- /hands-on-lab/media/02-One-Notebook-to-Rule-Them-All-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/02-One-Notebook-to-Rule-Them-All-3.png -------------------------------------------------------------------------------- /hands-on-lab/media/02-One-Notebook-to-Rule-Them-All-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/02-One-Notebook-to-Rule-Them-All-4.png -------------------------------------------------------------------------------- /hands-on-lab/media/02-One-Notebook-to-Rule-Them-All-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/02-One-Notebook-to-Rule-Them-All-5.png -------------------------------------------------------------------------------- /hands-on-lab/media/89-git-repositories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/89-git-repositories.png -------------------------------------------------------------------------------- /hands-on-lab/media/91-git-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/91-git-workflow.png -------------------------------------------------------------------------------- /hands-on-lab/media/92-git-workflow-databricks-notebooks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/92-git-workflow-databricks-notebooks.png -------------------------------------------------------------------------------- /hands-on-lab/media/92-git-workflow-library.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/92-git-workflow-library.png -------------------------------------------------------------------------------- /hands-on-lab/media/93-naming-conventions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/93-naming-conventions.png -------------------------------------------------------------------------------- /hands-on-lab/media/94-release-lifecycle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/94-release-lifecycle.png -------------------------------------------------------------------------------- /hands-on-lab/media/ADFPipelineRunning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/ADFPipelineRunning.png -------------------------------------------------------------------------------- /hands-on-lab/media/CI-Iac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/CI-Iac.png -------------------------------------------------------------------------------- /hands-on-lab/media/PRDEV2QA-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/PRDEV2QA-1.png -------------------------------------------------------------------------------- /hands-on-lab/media/PRDEV2QA-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/PRDEV2QA-2.png -------------------------------------------------------------------------------- /hands-on-lab/media/PRDEV2QA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/PRDEV2QA.png -------------------------------------------------------------------------------- /hands-on-lab/media/Pipelines-ADF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Pipelines-ADF.png -------------------------------------------------------------------------------- /hands-on-lab/media/Pipelines-Databricks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Pipelines-Databricks.png -------------------------------------------------------------------------------- /hands-on-lab/media/Pipelines-IaC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Pipelines-IaC.png -------------------------------------------------------------------------------- /hands-on-lab/media/Pipelines-lib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Pipelines-lib.png -------------------------------------------------------------------------------- /hands-on-lab/media/Pipelines.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Pipelines.png -------------------------------------------------------------------------------- /hands-on-lab/media/RGComputeDev.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/RGComputeDev.png -------------------------------------------------------------------------------- /hands-on-lab/media/RGDataDev.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/RGDataDev.png -------------------------------------------------------------------------------- /hands-on-lab/media/Run-CDPipeline-ADF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Run-CDPipeline-ADF.png -------------------------------------------------------------------------------- /hands-on-lab/media/Run-CDPipeline-ADFGood.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Run-CDPipeline-ADFGood.png -------------------------------------------------------------------------------- /hands-on-lab/media/Run-CDPipeline-Databricks-Lib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Run-CDPipeline-Databricks-Lib.png -------------------------------------------------------------------------------- /hands-on-lab/media/Run-CDPipeline-Databricks-Notebooks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Run-CDPipeline-Databricks-Notebooks.png -------------------------------------------------------------------------------- /hands-on-lab/media/Run-CDPipeline-Iac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Run-CDPipeline-Iac.png -------------------------------------------------------------------------------- /hands-on-lab/media/Run-CDPipeline-lib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Run-CDPipeline-lib.png -------------------------------------------------------------------------------- /hands-on-lab/media/Run-CIPipeline-Databricks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Run-CIPipeline-Databricks.png -------------------------------------------------------------------------------- /hands-on-lab/media/Run-CIPipeline-Iac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Run-CIPipeline-Iac.png -------------------------------------------------------------------------------- /hands-on-lab/media/Run-CIPipeline-lib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Run-CIPipeline-lib.png -------------------------------------------------------------------------------- /hands-on-lab/media/SP-secret.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/SP-secret.png -------------------------------------------------------------------------------- /hands-on-lab/media/Versionlib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/Versionlib.png -------------------------------------------------------------------------------- /hands-on-lab/media/adf-copy-data-blob-storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/adf-copy-data-blob-storage.png -------------------------------------------------------------------------------- /hands-on-lab/media/adf-dataops-eastus2-dev-author.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/adf-dataops-eastus2-dev-author.png -------------------------------------------------------------------------------- /hands-on-lab/media/adf-dataops-eastus2-dev-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/adf-dataops-eastus2-dev-overview.png -------------------------------------------------------------------------------- /hands-on-lab/media/adf-dataops-eastus2-dev-process-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/adf-dataops-eastus2-dev-process-data.png -------------------------------------------------------------------------------- /hands-on-lab/media/adf-dataops-eastus2-dev-workspace1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/adf-dataops-eastus2-dev-workspace1.png -------------------------------------------------------------------------------- /hands-on-lab/media/airport-codes-source-csv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/airport-codes-source-csv.png -------------------------------------------------------------------------------- /hands-on-lab/media/airport-codes-sync.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/airport-codes-sync.png -------------------------------------------------------------------------------- /hands-on-lab/media/alpbaVersionlib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/alpbaVersionlib.png -------------------------------------------------------------------------------- /hands-on-lab/media/behave-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/behave-results.png -------------------------------------------------------------------------------- /hands-on-lab/media/behave-script.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/behave-script.png -------------------------------------------------------------------------------- /hands-on-lab/media/betaVersionlib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/betaVersionlib.png -------------------------------------------------------------------------------- /hands-on-lab/media/branch-policies-builder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/branch-policies-builder.png -------------------------------------------------------------------------------- /hands-on-lab/media/branch-policies-own-owner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/branch-policies-own-owner.png -------------------------------------------------------------------------------- /hands-on-lab/media/compute-template-json.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/compute-template-json.png -------------------------------------------------------------------------------- /hands-on-lab/media/copy-airport-codes-sink.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/copy-airport-codes-sink.png -------------------------------------------------------------------------------- /hands-on-lab/media/copy-airport-codes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/copy-airport-codes.png -------------------------------------------------------------------------------- /hands-on-lab/media/copy-value-clientsecret.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/copy-value-clientsecret.png -------------------------------------------------------------------------------- /hands-on-lab/media/dbw-dataops-attaching-cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/dbw-dataops-attaching-cluster.png -------------------------------------------------------------------------------- /hands-on-lab/media/dbw-dataops-eastus2-dev-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/dbw-dataops-eastus2-dev-overview.png -------------------------------------------------------------------------------- /hands-on-lab/media/dbw-dataops-eastus2-dev-ws.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/dbw-dataops-eastus2-dev-ws.png -------------------------------------------------------------------------------- /hands-on-lab/media/dbw-dataops-new-cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/dbw-dataops-new-cluster.png -------------------------------------------------------------------------------- /hands-on-lab/media/environments-DEV-Databricks-Notebooks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/environments-DEV-Databricks-Notebooks.png -------------------------------------------------------------------------------- /hands-on-lab/media/environments-DEV-Databricks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/environments-DEV-Databricks.png -------------------------------------------------------------------------------- /hands-on-lab/media/environments-qa-prod.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/environments-qa-prod.png -------------------------------------------------------------------------------- /hands-on-lab/media/environments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/environments.png -------------------------------------------------------------------------------- /hands-on-lab/media/globaltable-flight_delays_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/globaltable-flight_delays_view.png -------------------------------------------------------------------------------- /hands-on-lab/media/high-level-overview-dataops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/high-level-overview-dataops.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-ci.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-ci.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-file-corejson-databricks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-file-corejson-databricks.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-folder-databricks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-folder-databricks.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-folder-infrastructure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-folder-infrastructure.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-folder-linkedtemplates-subfolders.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-folder-linkedtemplates-subfolders.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-folder-linkedtemplates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-folder-linkedtemplates.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-folder-parameters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-folder-parameters.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-folder-subfolder-tests.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-folder-subfolder-tests.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-linkedtemplates-template-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-linkedtemplates-template-compute.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-ordem-scripts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-ordem-scripts.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-scripts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-scripts.png -------------------------------------------------------------------------------- /hands-on-lab/media/iac-service-principal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/iac-service-principal.png -------------------------------------------------------------------------------- /hands-on-lab/media/infrastructure-as-code-folder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/infrastructure-as-code-folder.png -------------------------------------------------------------------------------- /hands-on-lab/media/lakedataopseastus2dev-airport-metadata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/lakedataopseastus2dev-airport-metadata.png -------------------------------------------------------------------------------- /hands-on-lab/media/lakedataopseastus2dev-layer-landing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/lakedataopseastus2dev-layer-landing.png -------------------------------------------------------------------------------- /hands-on-lab/media/lakedataopseastus2dev-layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/lakedataopseastus2dev-layers.png -------------------------------------------------------------------------------- /hands-on-lab/media/lakedataopseastus2dev-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/lakedataopseastus2dev-overview.png -------------------------------------------------------------------------------- /hands-on-lab/media/last-pipeline-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/last-pipeline-run.png -------------------------------------------------------------------------------- /hands-on-lab/media/mount-adls-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/mount-adls-1.png -------------------------------------------------------------------------------- /hands-on-lab/media/mount-adls-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/mount-adls-2.png -------------------------------------------------------------------------------- /hands-on-lab/media/notebook-01-adls-mount.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/notebook-01-adls-mount.png -------------------------------------------------------------------------------- /hands-on-lab/media/notebook-01-adls-runcell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/notebook-01-adls-runcell.png -------------------------------------------------------------------------------- /hands-on-lab/media/parameters-dev-json.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/parameters-dev-json.png -------------------------------------------------------------------------------- /hands-on-lab/media/pipeline-run-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/pipeline-run-results.png -------------------------------------------------------------------------------- /hands-on-lab/media/pipeline-stages-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/pipeline-stages-run.png -------------------------------------------------------------------------------- /hands-on-lab/media/pipeline-trigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/pipeline-trigger.png -------------------------------------------------------------------------------- /hands-on-lab/media/rcVersionlib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/rcVersionlib.png -------------------------------------------------------------------------------- /hands-on-lab/media/resource-groups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/resource-groups.png -------------------------------------------------------------------------------- /hands-on-lab/media/rg-dataops-compute-dev.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/rg-dataops-compute-dev.png -------------------------------------------------------------------------------- /hands-on-lab/media/rg-dataops-data-dev.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/rg-dataops-data-dev.png -------------------------------------------------------------------------------- /hands-on-lab/media/scope-dataops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/scope-dataops.png -------------------------------------------------------------------------------- /hands-on-lab/media/select-test-yml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/select-test-yml.png -------------------------------------------------------------------------------- /hands-on-lab/media/stgdataopseastus2dev-airport-metadata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/stgdataopseastus2dev-airport-metadata.png -------------------------------------------------------------------------------- /hands-on-lab/media/stgdataopseastus2dev-containers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/stgdataopseastus2dev-containers.png -------------------------------------------------------------------------------- /hands-on-lab/media/stgdataopseastus2dev.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/stgdataopseastus2dev.png -------------------------------------------------------------------------------- /hands-on-lab/media/task03_01-library-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/task03_01-library-workflow.png -------------------------------------------------------------------------------- /hands-on-lab/media/task03_02-artifacts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/task03_02-artifacts.png -------------------------------------------------------------------------------- /hands-on-lab/media/task03_02-artifactsliboverview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/task03_02-artifactsliboverview.png -------------------------------------------------------------------------------- /hands-on-lab/media/task03_04-artifactsliboverview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/task03_04-artifactsliboverview.png -------------------------------------------------------------------------------- /hands-on-lab/media/task2_01-Exploring-Python-Custom-Libraries.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/task2_01-Exploring-Python-Custom-Libraries.png -------------------------------------------------------------------------------- /hands-on-lab/media/task2_02-Exploring-Python-Custom-Libraries.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/task2_02-Exploring-Python-Custom-Libraries.png -------------------------------------------------------------------------------- /hands-on-lab/media/task2_03-Exploring-Python-Custom-Libraries.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/task2_03-Exploring-Python-Custom-Libraries.png -------------------------------------------------------------------------------- /hands-on-lab/media/task2_04-Exploring-Python-Custom-Libraries.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/task2_04-Exploring-Python-Custom-Libraries.png -------------------------------------------------------------------------------- /hands-on-lab/media/templates-folder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/hands-on-lab/media/templates-folder.png -------------------------------------------------------------------------------- /infrastructure-as-code/GitVersion.yml: -------------------------------------------------------------------------------- 1 | next-version: 0.1 2 | mode: ContinuousDeployment 3 | continuous-delivery-fallback-tag: prod -------------------------------------------------------------------------------- /infrastructure-as-code/databricks/dev/interactive.json: -------------------------------------------------------------------------------- 1 | { 2 | "cluster_name": "interactive-cluster", 3 | "spark_version": "15.4.x-scala2.12", 4 | "azure_attributes": { 5 | "first_on_demand": 1, 6 | "availability": "ON_DEMAND_AZURE", 7 | "spot_bid_max_price": -1 8 | }, 9 | "node_type_id": "Standard_DS3_v2", 10 | "spark_env_vars": { 11 | "PYSPARK_PYTHON": "/databricks/python3/bin/python3" 12 | }, 13 | "autotermination_minutes": 120, 14 | "single_user_name": "alopezmoreno@mngenvmcap602086.onmicrosoft.com", 15 | "data_security_mode": "SINGLE_USER", 16 | "runtime_engine": "PHOTON", 17 | "autoscale": { 18 | "min_workers": 2, 19 | "max_workers": 4 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /infrastructure-as-code/databricks/prod/interactive.json: -------------------------------------------------------------------------------- 1 | { 2 | "cluster_name": "interactive-cluster", 3 | "spark_version": "15.4.x-scala2.12", 4 | "azure_attributes": { 5 | "first_on_demand": 1, 6 | "availability": "ON_DEMAND_AZURE", 7 | "spot_bid_max_price": -1 8 | }, 9 | "node_type_id": "Standard_DS3_v2", 10 | "spark_env_vars": { 11 | "PYSPARK_PYTHON": "/databricks/python3/bin/python3" 12 | }, 13 | "autotermination_minutes": 120, 14 | "single_user_name": "alopezmoreno@mngenvmcap602086.onmicrosoft.com", 15 | "data_security_mode": "SINGLE_USER", 16 | "runtime_engine": "PHOTON", 17 | "autoscale": { 18 | "min_workers": 2, 19 | "max_workers": 4 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /infrastructure-as-code/databricks/qa/interactive.json: -------------------------------------------------------------------------------- 1 | { 2 | "cluster_name": "interactive-cluster", 3 | "spark_version": "15.4.x-scala2.12", 4 | "azure_attributes": { 5 | "first_on_demand": 1, 6 | "availability": "ON_DEMAND_AZURE", 7 | "spot_bid_max_price": -1 8 | }, 9 | "node_type_id": "Standard_DS3_v2", 10 | "spark_env_vars": { 11 | "PYSPARK_PYTHON": "/databricks/python3/bin/python3" 12 | }, 13 | "autotermination_minutes": 120, 14 | "single_user_name": "alopezmoreno@mngenvmcap602086.onmicrosoft.com", 15 | "data_security_mode": "SINGLE_USER", 16 | "runtime_engine": "PHOTON", 17 | "autoscale": { 18 | "min_workers": 2, 19 | "max_workers": 4 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /infrastructure-as-code/databricks/sandbox/core.json: -------------------------------------------------------------------------------- 1 | { 2 | "cluster_name": "dataops-cluster", 3 | "autoscale": { 4 | "min_workers": 1, 5 | "max_workers": 2 6 | }, 7 | "spark_version": "7.5.x-scala2.12", 8 | "spark_conf": { 9 | "spark.databricks.delta.preview.enabled": "true" 10 | }, 11 | "azure_attributes": { 12 | "first_on_demand": 1, 13 | "availability": "ON_DEMAND_AZURE", 14 | "spot_bid_max_price": -1 15 | }, 16 | "node_type_id": "Standard_DS3_v2", 17 | "driver_node_type_id": "Standard_DS3_v2", 18 | "ssh_public_keys": [], 19 | "custom_tags": {}, 20 | "spark_env_vars": { 21 | "PYSPARK_PYTHON": "/databricks/python3/bin/python3" 22 | }, 23 | "autotermination_minutes": 60, 24 | "enable_elastic_disk": true, 25 | "cluster_source": "API", 26 | "init_scripts": [] 27 | } -------------------------------------------------------------------------------- /infrastructure-as-code/infrastructure/linkedTemplates/data/template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "location": { 6 | "type": "string" 7 | }, 8 | "dataSourceStorageAccountName": { 9 | "type": "string" 10 | }, 11 | "dataLakeName": { 12 | "type": "string" 13 | }, 14 | "dataLakeSkuName": { 15 | "type": "string", 16 | "defaultValue": "Standard_LRS", 17 | "allowedValues": [ 18 | "Standard_LRS", 19 | "Standard_GRS", 20 | "Standard_ZRS" 21 | ], 22 | "metadata": { 23 | "description": "Storage Account SKU" 24 | } 25 | } 26 | }, 27 | "variables": { 28 | "datalakeFileSystems": [ "landing", "trusted", "refined" ] 29 | }, 30 | "resources": [ 31 | { 32 | "type": "Microsoft.Storage/storageAccounts", 33 | "apiVersion": "2019-06-01", 34 | "name": "[parameters('dataLakeName')]", 35 | "location": "[parameters('location')]", 36 | "sku": { 37 | "name": "[parameters('dataLakeSkuName')]", 38 | "tier": "Standard" 39 | }, 40 | "kind": "StorageV2", 41 | "properties": { 42 | "accessTier": "Hot", 43 | "isHnsEnabled": true, 44 | "supportsHttpsTrafficOnly": true, 45 | "minimumTlsVersion": "TLS1_2" 46 | } 47 | }, 48 | { 49 | "type": "Microsoft.Storage/storageAccounts/blobServices/containers", 50 | "apiVersion": "2019-06-01", 51 | "name": "[concat(parameters('dataLakeName'), '/default/', variables('datalakeFileSystems')[copyIndex()])]", 52 | "copy": { 53 | "name": "lakeContainersCopy", 54 | "count": "[length(variables('datalakeFileSystems'))]" 55 | }, 56 | "dependsOn": [ 57 | "[parameters('dataLakeName')]" 58 | ] 59 | }, 60 | { 61 | "name": "[parameters('dataSourceStorageAccountName')]", 62 | "type": "Microsoft.Storage/storageAccounts", 63 | "apiVersion": "2019-06-01", 64 | "location": "[parameters('location')]", 65 | "properties": { 66 | "accessTier": "Hot", 67 | "minimumTlsVersion": "TLS1_2", 68 | "supportsHttpsTrafficOnly": true, 69 | "allowBlobPublicAccess": true, 70 | "allowSharedKeyAccess": true, 71 | "networkAcls": { 72 | "bypass": "AzureServices", 73 | "defaultAction": "Allow", 74 | "ipRules": [] 75 | } 76 | }, 77 | "dependsOn": [], 78 | "sku": { 79 | "name": "Standard_LRS" 80 | }, 81 | "kind": "StorageV2", 82 | "tags": {} 83 | }, 84 | { 85 | "name": "[concat(parameters('dataSourceStorageAccountName'), '/default/', 'flights-data')]", 86 | "type": "Microsoft.Storage/storageAccounts/blobServices/containers", 87 | "apiVersion": "2019-06-01", 88 | "dependsOn": [ 89 | "[parameters('dataSourceStorageAccountName')]" 90 | ] 91 | } 92 | ], 93 | "outputs": {} 94 | } -------------------------------------------------------------------------------- /infrastructure-as-code/infrastructure/linkedTemplates/roleAssigments/compute.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "databricksName": { 6 | "type": "string" 7 | }, 8 | "dataFactoryPrincipalId": { 9 | "type": "string" 10 | } 11 | }, 12 | "variables": { 13 | "role": { 14 | "Owner": "[resourceId('Microsoft.Authorization/roleDefinitions', '8e3af657-a8ff-443c-a75c-2fe8c4bcb635')]", 15 | "Contributor": "[resourceId('Microsoft.Authorization/roleDefinitions', 'b24988ac-6180-42a0-ab88-20f7382dd24c')]", 16 | "Reader": "[resourceId('Microsoft.Authorization/roleDefinitions', 'acdd72a7-3385-48ef-bd42-f606fba81ae7')]" 17 | }, 18 | "dataFactoryDatabricksRoleId": "[guid(resourceId('Microsoft.Databricks/workspaces', parameters('databricksName')), variables('role')['Contributor'], parameters('dataFactoryPrincipalId'))]" 19 | }, 20 | "resources": [ 21 | { 22 | "type": "Microsoft.Databricks/workspaces/providers/roleAssignments", 23 | "apiVersion": "2020-04-01-preview", 24 | "name": "[concat(parameters('databricksName'), '/Microsoft.Authorization/', variables('dataFactoryDatabricksRoleId'))]", 25 | "properties": { 26 | "roleDefinitionId": "[variables('role')['Contributor']]", 27 | "principalId": "[parameters('dataFactoryPrincipalId')]" 28 | } 29 | } 30 | ], 31 | "outputs": {} 32 | } -------------------------------------------------------------------------------- /infrastructure-as-code/infrastructure/linkedTemplates/roleAssigments/data.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "dataLakeName": { 6 | "type": "string" 7 | }, 8 | "dataFactoryPrincipalId": { 9 | "type": "string" 10 | } 11 | }, 12 | "variables": { 13 | "role": { 14 | "Owner": "[resourceId('Microsoft.Authorization/roleDefinitions', '8e3af657-a8ff-443c-a75c-2fe8c4bcb635')]", 15 | "Contributor": "[resourceId('Microsoft.Authorization/roleDefinitions', 'b24988ac-6180-42a0-ab88-20f7382dd24c')]", 16 | "Reader": "[resourceId('Microsoft.Authorization/roleDefinitions', 'acdd72a7-3385-48ef-bd42-f606fba81ae7')]", 17 | "Storage Blob Data Contributor": "[resourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe')]" 18 | }, 19 | "dataFactoryDataLakeRoleId": "[guid(resourceId('Microsoft.Storage/storageAccounts', parameters('dataLakeName')), variables('role')['Storage Blob Data Contributor'], parameters('dataFactoryPrincipalId'))]" 20 | }, 21 | "resources": [ 22 | { 23 | "type": "Microsoft.Storage/storageAccounts/providers/roleAssignments", 24 | "apiVersion": "2020-04-01-preview", 25 | "name": "[concat(parameters('dataLakeName'), '/Microsoft.Authorization/', variables('dataFactoryDataLakeRoleId'))]", 26 | "properties": { 27 | "roleDefinitionId": "[variables('role')['Storage Blob Data Contributor']]", 28 | "principalId": "[parameters('dataFactoryPrincipalId')]" 29 | } 30 | } 31 | ], 32 | "outputs": {} 33 | } -------------------------------------------------------------------------------- /infrastructure-as-code/infrastructure/parameters/parameters.dev.template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environment":{ 6 | "value": "dev" 7 | }, 8 | "solutionName":{ 9 | "value": "" 10 | }, 11 | "location": { 12 | "value": "eastus" 13 | }, 14 | "servicePrincipal": { 15 | "value": "SP--DevTest" 16 | }, 17 | "servicePrincipalSecret": { 18 | "value": "" 19 | }, 20 | "resourceGroupData":{ 21 | "value": "rg--data-dev" 22 | }, 23 | "resourceGroupCompute":{ 24 | "value": "rg--compute-dev" 25 | }, 26 | "resourceGroupMachineLearning":{ 27 | "value": "rg--ml-dev" 28 | }, 29 | "resourceGroupManagedDatabricks":{ 30 | "value": "rg--compute-dbw-dev" 31 | }, 32 | "dataLakeSkuName":{ 33 | "value": "Standard_LRS" 34 | }, 35 | "keyVaultSecretsAdminObjectId": { 36 | "value": "c06d0482-8384-4205-a17f-edd2222dce1b" 37 | }, 38 | "dataFactoryAccountName": { 39 | "value": "" 40 | }, 41 | "dataFactoryProjectName": { 42 | "value": "" 43 | }, 44 | "dataFactoryRepositoryName": { 45 | "value": "" 46 | }, 47 | "dataFactoryCollaborationBranch": { 48 | "value": "develop" 49 | }, 50 | "dataFactoryRootFolder": { 51 | "value": "data-platform/adf" 52 | } 53 | } 54 | } -------------------------------------------------------------------------------- /infrastructure-as-code/infrastructure/parameters/parameters.prod.template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environment":{ 6 | "value": "prod" 7 | }, 8 | "solutionName":{ 9 | "value": "" 10 | }, 11 | "location": { 12 | "value": "eastus" 13 | }, 14 | "servicePrincipal": { 15 | "value": "SP--DevTest" 16 | }, 17 | "servicePrincipalSecret": { 18 | "value": "" 19 | }, 20 | "resourceGroupData":{ 21 | "value": "rg--data-prod" 22 | }, 23 | "resourceGroupCompute":{ 24 | "value": "rg--compute-prod" 25 | }, 26 | "resourceGroupMachineLearning":{ 27 | "value": "rg--ml-prod" 28 | }, 29 | "resourceGroupManagedDatabricks":{ 30 | "value": "rg--compute-dbw-prod" 31 | }, 32 | "dataLakeSkuName":{ 33 | "value": "Standard_GRS" 34 | }, 35 | "keyVaultSecretsAdminObjectId": { 36 | "value": "ff1b0094-0e92-4e5b-9f21-502837003e85" 37 | } 38 | } 39 | } -------------------------------------------------------------------------------- /infrastructure-as-code/infrastructure/parameters/parameters.qa.template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environment":{ 6 | "value": "qa" 7 | }, 8 | "solutionName":{ 9 | "value": "" 10 | }, 11 | "location": { 12 | "value": "eastus" 13 | }, 14 | "servicePrincipal": { 15 | "value": "SP--DevTest" 16 | }, 17 | "servicePrincipalSecret": { 18 | "value": "" 19 | }, 20 | "resourceGroupData":{ 21 | "value": "rg--data-qa" 22 | }, 23 | "resourceGroupCompute":{ 24 | "value": "rg--compute-qa" 25 | }, 26 | "resourceGroupMachineLearning":{ 27 | "value": "rg--ml-qa" 28 | }, 29 | "resourceGroupManagedDatabricks":{ 30 | "value": "rg--compute-dbw-qa" 31 | }, 32 | "dataLakeSkuName":{ 33 | "value": "Standard_GRS" 34 | }, 35 | "keyVaultSecretsAdminObjectId": { 36 | "value": "4b0a1c5e-3f7c-4d81-9d2b-479a654b1c70" 37 | } 38 | } 39 | } -------------------------------------------------------------------------------- /infrastructure-as-code/infrastructure/sample-data/AirportCodeLocationLookupClean.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/infrastructure-as-code/infrastructure/sample-data/AirportCodeLocationLookupClean.zip -------------------------------------------------------------------------------- /infrastructure-as-code/infrastructure/sample-data/FlightDelaysWithAirportCodes.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/infrastructure-as-code/infrastructure/sample-data/FlightDelaysWithAirportCodes.zip -------------------------------------------------------------------------------- /infrastructure-as-code/infrastructure/sample-data/FlightWeatherWithAirportCode.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/infrastructure-as-code/infrastructure/sample-data/FlightWeatherWithAirportCode.zip -------------------------------------------------------------------------------- /infrastructure-as-code/scripts/AcceptanceTest.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [Parameter(Mandatory)] [string] $AzureDevOpsPAT, 3 | [Parameter(Mandatory)] [string] $AzureDevOpsOrganization, 4 | [Parameter(Mandatory)] [string] $AzureDevOpsProject, 5 | [Parameter(Mandatory)] [string] $SolutionName, 6 | [Parameter(Mandatory)] [string] $Environment 7 | ) 8 | 9 | Write-Host "Login Azure DevOps Extension" 10 | Write-Output $AzureDevOpsPAT | az devops login 11 | 12 | Write-Host "Set default Azure DevOps organization and project" 13 | az devops configure --defaults organization=$AzureDevOpsOrganization project="$AzureDevOpsProject" 14 | 15 | $GroupName = "dataops-iac-cd-output-$Environment" 16 | $GroupId = $(az pipelines variable-group list --query "[?name=='$GroupName'].id" -o tsv) 17 | 18 | Write-Host "Getting variables from Variable Group $GroupName..." -ForegroundColor Green 19 | $json = $(az pipelines variable-group variable list --group-id $GroupId) 20 | $variables = $json | ConvertFrom-Json -AsHashtable 21 | 22 | Write-Host "Setting environment variables from ARM outputs..." -ForegroundColor Green 23 | foreach ($variable in $variables.GetEnumerator()) { 24 | $key = [regex]::replace($variable.Key, '([A-Z])(.)', { "_" + $args[0] }).ToUpper() #camelCase to SNAKE_CASE 25 | Set-Item "env:ACC_TEST_$key" $variable.Value.value 26 | } 27 | 28 | Write-Host "Filtering environments that should be excluded..." -ForegroundColor Green 29 | $filtered = "dev", "qa", "prod" | Where-Object { $_ -ne $Environment } 30 | 31 | Write-Host "Running acceptance tests..." -ForegroundColor Green 32 | Invoke-Pester -CI -Output Detailed ../infrastructure-as-code/tests/ -ExcludeTagFilter $filtered 33 | -------------------------------------------------------------------------------- /infrastructure-as-code/scripts/DatabricksScopeCreation.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [Parameter(Mandatory)] [string] $KeyVaultName, 3 | [Parameter(Mandatory)] [string] $ComputeResourceGroup, 4 | [Parameter(Mandatory)] [string] $DatabricksName 5 | ) 6 | 7 | Write-Host "Installing Databricks Cli..." -ForegroundColor Green 8 | pip install databricks-cli --upgrade 9 | 10 | Write-Host "Getting Azure resources..." -ForegroundColor Green 11 | $kv = Get-AzKeyVault -VaultName $KeyVaultName 12 | $dbw = Get-AzDatabricksWorkspace -ResourceGroupName $ComputeResourceGroup -Name $DatabricksName 13 | 14 | Write-Host "Creating the Key Vault secret scope on Databricks..." -ForegroundColor Green 15 | $accessToken = Get-AzAccessToken -ResourceUrl 2ff814a6-3304-4ab8-85cb-cd0e6f879c1d 16 | $env:DATABRICKS_TOKEN = $accessToken.Token 17 | $env:DATABRICKS_HOST = "https://$($dbw.Url)" 18 | Write-Host "URL DBW https://$($dbw.Url)" 19 | 20 | $scopesList = databricks secrets list-scopes --output json | ConvertFrom-Json 21 | if (! $scopesList.scopes.name -contains "dataops") { 22 | databricks secrets create-scope --scope 'dataops' --scope-backend-type AZURE_KEYVAULT --resource-id $kv.ResourceId --dns-name $kv.VaultUri 23 | } 24 | 25 | Write-Host "Listing Databricks scope content..." -ForegroundColor Green 26 | databricks secrets list --scope dataops 27 | 28 | Write-Host "Finished!" -ForegroundColor Blue -------------------------------------------------------------------------------- /infrastructure-as-code/scripts/Deploy.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [Parameter(Mandatory)] [string] [ValidateSet("dev", "qa", "prod", "sandbox")] $Environment, 3 | [Parameter(Mandatory)] [string] $Version, 4 | [Parameter(Mandatory)] [string] $Location, 5 | [Parameter(Mandatory)] [string] $SolutionName, 6 | [string] $SolutionParametersFile = "./infrastructure-as-code/infrastructure/parameters/parameters.$Environment.json", 7 | [string] $DeploymentOutputFile 8 | ) 9 | 10 | $ErrorActionPreference = "Stop" 11 | 12 | [string] $TemplateSpecRgName = "rg-$SolutionName-template-specs" 13 | 14 | Write-Host "Start deploying $SolutionName for $Version at $Location" -ForegroundColor Green 15 | 16 | Write-Host "Getting template spec $TemplateSpecRgName $Version..." -ForegroundColor Green 17 | $template = Get-AzTemplateSpec -ResourceGroupName $TemplateSpecRgName -Name $SolutionName -Version $Version 18 | 19 | Write-Host "Deploying template..." -ForegroundColor Green 20 | $deployment = New-AzDeployment -Location $Location -TemplateSpecId $template.Versions.Id -Name $Version ` 21 | -TemplateParameterFile $SolutionParametersFile -SkipTemplateParameterPrompt -Verbose 22 | 23 | if ($DeploymentOutputFile) { 24 | Write-Host "Saving outputs to $DeploymentOutputFile..." -ForegroundColor Green 25 | $clustersOutput = @{} 26 | foreach ($output in $Deployment.Outputs.GetEnumerator()) { 27 | $clustersOutput.Add($output.Key, $output.Value.Value) 28 | } 29 | $clustersOutput | ConvertTo-Json | Set-Content -Path $DeploymentOutputFile 30 | } 31 | -------------------------------------------------------------------------------- /infrastructure-as-code/scripts/Lint.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [string] $TtkFolder = "./ttk", 3 | [string] $SolutionTemplateFolder = "./infrastructure-as-code/infrastructure" 4 | ) 5 | 6 | $ErrorActionPreference = "Continue" 7 | 8 | Import-Module "$TtkFolder/arm-ttk/arm-ttk.psd1" 9 | 10 | $testOutput = @(Test-AzTemplate -TemplatePath $SolutionTemplateFolder) 11 | $testOutput 12 | 13 | if ($testOutput | ? { $_.Errors }) { 14 | Write-Host "##vso[task.logissue type=warning;]Linter has found some problems." 15 | } 16 | -------------------------------------------------------------------------------- /infrastructure-as-code/scripts/Plan.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [Parameter(Mandatory)] [string] [ValidateSet("dev", "qa", "prod", "sandbox")] $Environment, 3 | [Parameter(Mandatory)] [string] $Version, 4 | [Parameter(Mandatory)] [string] $Location, 5 | [Parameter(Mandatory)] [string] $SolutionName, 6 | [string] $SolutionTemplateFile = "./infrastructure-as-code/infrastructure/azuredeploy.json", 7 | [string] $SolutionParametersFile = "./infrastructure-as-code/infrastructure/parameters/parameters.$Environment.json", 8 | [string] $VersionDescription = "", 9 | [string] $VersionBuildId = "", 10 | [string] $VersionAuthor = "" 11 | ) 12 | 13 | [string] $TemplateSpecRgName = "rg-$SolutionName-template-specs" 14 | 15 | Write-Host "Start planning $SolutionName ($Version) on $Environment environment at $Location" -ForegroundColor Green 16 | 17 | Write-Host "Getting resource group $TemplateSpecRgName" -ForegroundColor Green 18 | $rg = Get-AzResourceGroup -Name $TemplateSpecRgName 19 | 20 | if ($Environment -eq "prod"){ 21 | Write-Host "Checking if template spec $TemplateSpecRgName $Version already exists..." -ForegroundColor Green 22 | $template = Get-AzTemplateSpec -ResourceGroupName $TemplateSpecRgName -Name $SolutionName -Version $Version -ErrorAction SilentlyContinue 23 | } 24 | 25 | if (!$template) { 26 | Write-Host "Publishing template spec..." -ForegroundColor Green 27 | 28 | $metadata = @{ 29 | BuildId = $VersionBuildId 30 | Author = $VersionAuthor 31 | } 32 | 33 | $template = New-AzTemplateSpec -Name $SolutionName -Version $Version -ResourceGroupName $TemplateSpecRgName -Location $rg.Location -TemplateFile $SolutionTemplateFile -VersionDescription "$VersionDescription" -Tag $metadata -Force 34 | } 35 | 36 | Write-Host "Previewing template deployment changes..." -ForegroundColor Green 37 | New-AzDeployment -Location $Location -TemplateSpecId $template.Versions.Id -Name $Version ` 38 | -TemplateParameterFile $SolutionParametersFile -SkipTemplateParameterPrompt -WhatIf -------------------------------------------------------------------------------- /infrastructure-as-code/scripts/PublishOutputs.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [Parameter(Mandatory)] [string] $AzureDevOpsPAT, 3 | [Parameter(Mandatory)] [string] $AzureDevOpsOrganization, 4 | [Parameter(Mandatory)] [string] $AzureDevOpsProject, 5 | [Parameter(Mandatory)] [string] $GroupName, 6 | [Parameter(Mandatory)] [string] $DeploymentOutputFile 7 | ) 8 | 9 | Write-Host "Login Azure DevOps Extension" 10 | Write-Output $AzureDevOpsPAT | az devops login 11 | 12 | Write-Host "Set default Azure DevOps organization and project" 13 | az devops configure --defaults organization=$AzureDevOpsOrganization project="$AzureDevOpsProject" 14 | 15 | $GroupId = $(az pipelines variable-group list --query "[?name=='$GroupName'].id" -o tsv) 16 | 17 | if (! $GroupId) { 18 | Write-Host "Creating variable group $GroupName ..." 19 | $GroupId = $(az pipelines variable-group create --name $GroupName --authorize --variable createdAt="$(Get-Date)" --query "id" -o tsv) 20 | 21 | if (! $GroupId) { 22 | Write-Error "The build agent does not have permissions to create variable groups" 23 | exit 1 24 | } 25 | } 26 | 27 | Write-Host "Getting variables from $DeploymentOutputFile file..." 28 | $DeploymentOutput = Get-Content -Path $DeploymentOutputFile | ConvertFrom-Json -AsHashtable 29 | 30 | Write-Host "Setting Variable Group variables from ARM outputs..." 31 | foreach ($output in $DeploymentOutput.GetEnumerator()) { 32 | $name = $output.Key 33 | $value = $output.Value 34 | 35 | Write-Host "Trying to update variable $key..." 36 | if (! (az pipelines variable-group variable update --group-id $GroupId --name $name --value $value)) { 37 | Write-Host "Creating variable $key..." 38 | az pipelines variable-group variable create --group-id $GroupId --name $name --value $value 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /infrastructure-as-code/scripts/Sandbox.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [string] $Location = "eastus", 3 | [string] $SolutionName = "dataops", 4 | [switch] $AutoApprove, 5 | [switch] $Test, 6 | [switch] $Lint 7 | ) 8 | 9 | $ErrorActionPreference = "Stop" 10 | function Get-Confirmation() { 11 | if (!$AutoApprove) { 12 | $context = Get-AzContext 13 | $confirmation = Read-Host "Are you sure you want to deploy sandbox resources on the subscription $($context.Subscription.Name) ($($context.Subscription.Id))? (Y/n)" 14 | if ($confirmation -ne 'Y') { 15 | Write-Error "Setup cancelled." 16 | } 17 | } 18 | } 19 | 20 | $environment = "sandbox" 21 | $version = git branch --show-current | % { $_.replace("/", "-") } 22 | $outputFile = "sandbox.json" 23 | 24 | if ($Lint) { 25 | Write-Host "Running linter" -ForegroundColor Blue 26 | & $PSScriptRoot\Lint.ps1 27 | } 28 | 29 | Get-Confirmation 30 | Write-Host "Starting planning for $environment" -ForegroundColor Blue 31 | & $PSScriptRoot\Plan.ps1 -Environment $environment -Version $version -Location $Location -SolutionName $SolutionName 32 | 33 | Get-Confirmation 34 | Write-Host "Starting deploying ARM to $environment" -ForegroundColor Blue 35 | & $PSScriptRoot\Deploy.ps1 -Environment $environment -Version $version -Location $Location -SolutionName $SolutionName -DeploymentOutputFile $outputFile 36 | 37 | Write-Host "Setting environment variables from ARM outputs..." -ForegroundColor Green 38 | $deploymentOutput = Get-Content -Path $outputFile | ConvertFrom-Json -AsHashtable 39 | foreach ($output in $deploymentOutput.GetEnumerator()) { 40 | $key = [regex]::replace($output.Key, '([A-Z])(.)', { "_" + $args[0]}).ToUpper() #camelCase to SNAKE_CASE 41 | Set-Item "env:ACC_TEST_$key" $output.Value 42 | } 43 | Remove-Item $outputFile 44 | 45 | Write-Host "Starting deploying DBW clusters to $environment" -ForegroundColor Blue 46 | & $PSScriptRoot\DatabricksClusters.ps1 -Environment $environment -DatabricksWorkspaceHost "https://$($deploymentOutput.databricksWorkspaceUrl)/" 47 | 48 | if ($Test) { 49 | Write-Host "Running acceptance tests for $environment" -ForegroundColor Blue 50 | Invoke-Pester -Output Detailed ./tests/ -ExcludeTagFilter "prod" 51 | } 52 | 53 | Write-Host "Finished" -ForegroundColor Blue 54 | -------------------------------------------------------------------------------- /infrastructure-as-code/scripts/Setup.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [Parameter(Mandatory)] [string] [ValidateSet("dev", "qa", "prod", "sandbox")] $Environment, 3 | [string] $Location = "eastus", 4 | [string] $SolutionName = "dataops", 5 | [string] [ValidateLength(1, 4)] $SandboxUniqueName 6 | ) 7 | 8 | $ErrorActionPreference = "Stop" 9 | 10 | if ($Environment -eq "sandbox" -and ! $SandboxUniqueName) { 11 | Write-Error "When creating a sandbox environment, please set -SandboxUniqueName" 12 | } 13 | 14 | $context = Get-AzContext 15 | 16 | $confirmation = Read-Host "Are you sure you want to create a $Environment environment on the subscription $($context.Subscription.Name) ($($context.Subscription.Id))? (Y/n)" 17 | if ($confirmation -ne 'Y') { 18 | Write-Error "Setup cancelled. If you intend to switch subscriptions, please use 'Set-AzContext -Subscription '" 19 | } 20 | 21 | Write-Host "Starting creation of $SolutionName RGs for $Environment" -ForegroundColor Blue 22 | 23 | Write-Host "Creating resource group for data..." -ForegroundColor Green 24 | New-AzResourceGroup -Name "rg-$SolutionName-data-$Environment" -Location $Location -Force 25 | 26 | Write-Host "Creating resource group for compute..." -ForegroundColor Green 27 | New-AzResourceGroup -Name "rg-$SolutionName-compute-$Environment" -Location $Location -Force 28 | 29 | Write-Host "Creating resource group for machine learning..." -ForegroundColor Green 30 | New-AzResourceGroup -Name "rg-$SolutionName-ml-$Environment" -Location $Location -Force 31 | 32 | Write-Host "Creating resource group for network..." -ForegroundColor Green 33 | New-AzResourceGroup -Name "rg-$SolutionName-network-$Environment" -Location $Location -Force 34 | 35 | Write-Host "Creating resource group for template specs..." -ForegroundColor Green 36 | New-AzResourceGroup -Name "rg-dataops-template-specs" -Location $Location -Force 37 | 38 | Write-Host "Registering resource providers..." -ForegroundColor Green 39 | $Providers = "Microsoft.Storage", "Microsoft.Compute", "Microsoft.MachineLearningServices", "Microsoft.ContainerRegistry", ` 40 | "Microsoft.Databricks", "Microsoft.ContainerService", "Microsoft.Kubernetes", "Microsoft.KubernetesConfiguration", ` 41 | "Microsoft.KeyVault", "Microsoft.Insights", "Microsoft.DataFactory", "Microsoft.DataLakeStore" 42 | $Providers | Register-AzResourceProvider -ProviderNamespace { $_ } 43 | 44 | if ($Environment -eq "sandbox") { 45 | Write-Host "Creating Sandbox parameters file..." -ForegroundColor Green 46 | $devParamsFile = ".\infrastructure\parameters\parameters.dev.json" 47 | $sandboxParamsFile = ".\infrastructure\parameters\parameters.sandbox.json" 48 | $sandboxParams = Get-Content -Path $devParamsFile -Raw 49 | $sandboxParams = $sandboxParams.Replace('-dev', '-sandbox') 50 | $sandboxParams = $sandboxParams.Replace('"value": "dev"', "`"value`": `"$SandboxUniqueName`"") 51 | $sandboxParams | Set-Content -Path $sandboxParamsFile 52 | 53 | Write-Host "Downloading ARM Template Test Toolkit..." -ForegroundColor Green 54 | New-Item './ttk' -ItemType Directory -Force 55 | Invoke-WebRequest -Uri 'https://aka.ms/arm-ttk-latest' -OutFile './ttk/arm-ttk.zip' -Verbose 56 | Expand-Archive -Path './ttk/*.zip' -DestinationPath './ttk' -Verbose -Force 57 | } 58 | 59 | Write-Host "Finished" -ForegroundColor Blue 60 | -------------------------------------------------------------------------------- /infrastructure-as-code/scripts/UpdateKeyVaultSecrets.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [Parameter(Mandatory)] [string] $DeploymentOutputFile 3 | ) 4 | 5 | Write-Host "Getting variables from $DeploymentOutputFile file..." 6 | $DeploymentOutput = Get-Content -Path $DeploymentOutputFile | ConvertFrom-Json -AsHashtable 7 | $ResourceGroupName = $DeploymentOutput["resourceGroupData"] 8 | $StorageAccountName = $DeploymentOutput["dataSourceStorageAccountName"] 9 | $keyVaultName = $DeploymentOutput["keyVaultName"] 10 | 11 | $ErrorActionPreference = "Stop" 12 | 13 | $context = Get-AzContext 14 | Write-Host "Getting Service Principal information..." -ForegroundColor Green 15 | $servicePrincipal = Get-AzADServicePrincipal -ApplicationId $context.Account.Id 16 | 17 | Write-Host "Reading the Key Vault..." -ForegroundColor Green 18 | $kv = Get-AzKeyVault -VaultName $KeyVaultName 19 | 20 | Write-Host "Adding permissions to user on Key Vault..." -ForegroundColor Green 21 | $userPermissions = $kv.AccessPolicies | Where-Object { $_.ObjectId -eq $servicePrincipal.Id } 22 | $secretPermissions = $userPermissions.PermissionsToSecrets 23 | if (! $secretPermissions || ! $userPermissions.PermissionsToSecrets.Contains("set")) { 24 | Set-AzKeyVaultAccessPolicy -VaultName $KeyVaultName -ObjectId $servicePrincipal.Id -PermissionsToSecrets "set" 25 | } 26 | 27 | Write-Host "Add the Key Vault Secret..." 28 | $Key1 = (Get-AzStorageAccountKey -ResourceGroupName $ResourceGroupName -Name $StorageAccountName).Value[0] 29 | $ConnectionString = "DefaultEndpointsProtocol=https;AccountName=$StorageAccountName;AccountKey=$Key1;EndpointSuffix=core.windows.net" 30 | 31 | Set-AzKeyVaultSecret -VaultName $keyVaultName -Name "StorageAccountConnectionString" -SecretValue $(ConvertTo-SecureString $ConnectionString -AsPlainText) 32 | -------------------------------------------------------------------------------- /infrastructure-as-code/scripts/UploadSampleData.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [Parameter(Mandatory)] [string] $DeploymentOutputFile 3 | ) 4 | 5 | Write-Host "Unzip sample-data..." 6 | mkdir ./sample-data 7 | 8 | $zipFiles = ('FlightWeatherWithAirportCode.zip', 'FlightDelaysWithAirportCodes.zip', 'AirportCodeLocationLookupClean.zip') 9 | $zipPath = './infrastructure-as-code/infrastructure/sample-data/' 10 | 11 | foreach ($zipFile in $zipFiles){ 12 | $file = ($zipPath + $zipFile) 13 | Write-Host "File " $file 14 | Expand-Archive -path $file -destinationpath './sample-data/' 15 | } 16 | 17 | Write-Host "Getting variables from $DeploymentOutputFile file..." 18 | $DeploymentOutput = Get-Content -Path $DeploymentOutputFile | ConvertFrom-Json -AsHashtable 19 | 20 | Write-Host "Uploading files..." 21 | $ResourceGroupName = $DeploymentOutput["resourceGroupData"] 22 | $StorageAccountName = $DeploymentOutput["dataSourceStorageAccountName"] 23 | 24 | 25 | $uploadStorage = Get-AzStorageAccount -ResourceGroupName $ResourceGroupName -Name $StorageAccountName 26 | $ContainerName = "flights-data" 27 | 28 | Set-AzStorageBlobContent -Container $ContainerName -File ./sample-data/AirportCodeLocationLookupClean.csv -Context $uploadStorage.Context -Force 29 | Set-AzStorageBlobContent -Container $ContainerName -File ./sample-data/FlightDelaysWithAirportCodes.csv -Context $uploadStorage.Context -Force 30 | Set-AzStorageBlobContent -Container $ContainerName -File ./sample-data/FlightWeatherWithAirportCode.csv -Context $uploadStorage.Context -Force 31 | 32 | Write-Host "Files uploaded!" -------------------------------------------------------------------------------- /infrastructure-as-code/tests/Compute/DataFactory.Tests.ps1: -------------------------------------------------------------------------------- 1 | BeforeAll { 2 | $rgName = $env:ACC_TEST_RESOURCE_GROUP_COMPUTE 3 | $dataFactoryName = $env:ACC_TEST_DATA_FACTORY_NAME 4 | $location = $env:ACC_TEST_LOCATION 5 | } 6 | 7 | Describe "Data Factory" -Tag "Acceptance" { 8 | BeforeAll { 9 | $dataFactory = Get-AzDataFactoryV2 -ResourceGroupName $rgName -Name $dataFactoryName 10 | } 11 | Context "Resource" { 12 | It "Exists" { 13 | $dataFactory | Should -Not -BeNullOrEmpty 14 | } 15 | It "ProvisioningState Is Succeeded" { 16 | $dataFactory.ProvisioningState | Should -Be "Succeeded" 17 | } 18 | It "Is In Expected Location" { 19 | $dataFactory.Location | Should -Be $location 20 | } 21 | } 22 | Context "Source Control" { 23 | It "Is Using Repo Configuration" -Tag "dev" { 24 | $dataFactory.RepoConfiguration | Should -Not -BeNullOrEmpty 25 | $dataFactory.RepoConfiguration | Should -BeOfType Microsoft.Azure.Management.DataFactory.Models.FactoryVSTSConfiguration 26 | $dataFactory.RepoConfiguration.TenantId | Should -Not -BeNullOrEmpty 27 | $dataFactory.RepoConfiguration.AccountName | Should -Not -BeNullOrEmpty 28 | $dataFactory.RepoConfiguration.ProjectName | Should -Not -BeNullOrEmpty 29 | $dataFactory.RepoConfiguration.RepositoryName | Should -Not -BeNullOrEmpty 30 | $dataFactory.RepoConfiguration.CollaborationBranch | Should -Not -BeNullOrEmpty 31 | $dataFactory.RepoConfiguration.RootFolder | Should -Not -BeNullOrEmpty 32 | } 33 | It "Is Not Using Repo Configuration" -Tag "prod" { 34 | $dataFactory.RepoConfiguration | Should -BeNullOrEmpty 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /infrastructure-as-code/tests/Compute/Databricks.Tests.ps1: -------------------------------------------------------------------------------- 1 | BeforeAll { 2 | $rgName = $env:ACC_TEST_RESOURCE_GROUP_COMPUTE 3 | $databricksName = $env:ACC_TEST_DATABRICKS_NAME 4 | $location = $env:ACC_TEST_LOCATION 5 | } 6 | 7 | Describe "Databricks" -Tag "Acceptance" { 8 | BeforeAll { 9 | $databricks = Get-AzDatabricksWorkspace -ResourceGroupName $rgName -Name $databricksName 10 | } 11 | Context "Resource" { 12 | It "Exists" { 13 | $databricks | Should -Not -BeNullOrEmpty 14 | } 15 | It "ProvisioningState Is Succeeded" { 16 | $databricks.ProvisioningState | Should -Be "Succeeded" 17 | } 18 | It "Is In Expected Location" { 19 | $databricks.Location | Should -Be $location 20 | } 21 | } 22 | Context "SKU" { 23 | It "Is Premium" { 24 | $databricks.SkuName | Should -Be "premium" 25 | } 26 | } 27 | Context "Network" -Skip { 28 | It "Does Not Enable Public IP" { 29 | $databricks.EnableNoPublicIPValue | Should -BeFalse 30 | } 31 | It "Lives In Custom VNET" { 32 | $databricks.CustomVirtualNetworkIdValue | Should -Not -BeNullOrEmpty 33 | $databricks.CustomPrivateSubnetNameValue | Should -Not -BeNullOrEmpty 34 | $databricks.CustomPublicSubnetNameValue | Should -Not -BeNullOrEmpty 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /infrastructure-as-code/tests/Compute/KeyVault.Tests.ps1: -------------------------------------------------------------------------------- 1 | BeforeAll { 2 | $rgName = $env:ACC_TEST_RESOURCE_GROUP_COMPUTE 3 | $keyVaultName = $env:ACC_TEST_KEY_VAULT_NAME 4 | $location = $env:ACC_TEST_LOCATION 5 | } 6 | 7 | Describe "Key Vault" -Tag "Acceptance" { 8 | BeforeAll { 9 | $keyVault = Get-AzKeyVault -ResourceGroupName $rgName -Name $keyVaultName 10 | } 11 | Context "Resource" { 12 | It "Exists" { 13 | $keyVault | Should -Not -BeNullOrEmpty 14 | } 15 | It "Is In Expected Location" { 16 | $keyVault.Location | Should -Be $location 17 | } 18 | } 19 | Context "SKU" { 20 | It "Is Standard" { 21 | $keyVault.Sku | Should -Be "Standard" 22 | } 23 | } 24 | Context "Delete Protection" { 25 | It "Enables Purge Protection" { 26 | $keyVault.EnablePurgeProtection | Should -BeTrue 27 | } 28 | It "Enables Soft Delete" { 29 | $keyVault.EnableSoftDelete | Should -BeTrue 30 | } 31 | } 32 | Context "Usage Flags" { 33 | It "Is Not Enabled For Disk Encryption" { 34 | $keyVault.EnabledForDiskEncryption | Should -BeFalse 35 | } 36 | It "Is Not Enabled For Deployment" { 37 | $keyVault.EnabledForDeployment | Should -BeFalse 38 | } 39 | It "Is Not Enabled For Template Deployment" { 40 | $keyVault.EnabledForTemplateDeployment | Should -BeFalse 41 | } 42 | } 43 | Context "Authorization" { 44 | It "Uses Access Policy Authorization Model" { 45 | $keyVault.EnableRbacAuthorization | Should -BeFalse 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /infrastructure-as-code/tests/Compute/ResourceGroup.Tests.ps1: -------------------------------------------------------------------------------- 1 | BeforeAll { 2 | $rgName = $env:ACC_TEST_RESOURCE_GROUP_COMPUTE 3 | $location = $env:ACC_TEST_LOCATION 4 | } 5 | 6 | Describe "Resource Group Compute" -Tag "Acceptance" { 7 | BeforeAll { 8 | $rg = Get-AzResourceGroup -Name $rgName 9 | $rgLocks = Get-AzResourceLock -ResourceGroupName $rgName -AtScope 10 | } 11 | Context "Resource" { 12 | It "Exists" { 13 | $rg | Should -Not -BeNullOrEmpty 14 | } 15 | It "ProvisioningState Is Succeeded" { 16 | $rg.ProvisioningState | Should -Be "Succeeded" 17 | } 18 | It "Is In Expected Location" { 19 | $rg.Location | Should -Be $location 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /infrastructure-as-code/tests/Compute/RoleAssigments.Tests.ps1: -------------------------------------------------------------------------------- 1 | BeforeAll { 2 | $rgComputeName = $env:ACC_TEST_RESOURCE_GROUP_COMPUTE 3 | $rgDataName = $env:ACC_TEST_RESOURCE_GROUP_DATA 4 | $keyVaultName = $env:ACC_TEST_KEY_VAULT_NAME 5 | $dataFactoryName = $env:ACC_TEST_DATA_FACTORY_NAME 6 | $databricksName = $env:ACC_TEST_DATABRICKS_NAME 7 | $dataLakeName = $env:ACC_TEST_DATA_LAKE_NAME 8 | } 9 | 10 | Describe "Role Assigments" -Tag "Acceptance" { 11 | BeforeAll { 12 | $keyVault = Get-AzKeyVault -ResourceGroupName $rgComputeName -Name $keyVaultName 13 | $dataFactory = Get-AzDataFactoryV2 -ResourceGroupName $rgComputeName -Name $dataFactoryName 14 | $dataLake = Get-AzStorageAccount -ResourceGroupName $rgDataName -Name $dataLakeName 15 | $databricks = Get-AzDatabricksWorkspace -ResourceGroupName $rgComputeName -Name $databricksName 16 | 17 | # TODO: Service Principal needs permission to read AD Objects ID. 18 | # $dataLakeRoles = Get-AzRoleAssignment -Scope $dataLake.Id 19 | # $databricksRoles = Get-AzRoleAssignment -Scope $databricks.Id 20 | } 21 | Context "Key Vault" { 22 | It "Databricks" { 23 | $objectId = 'fe597bb2-377c-44f1-8515-82c8a1a62e3d' 24 | $policy = $keyVault.AccessPolicies | Where-Object { $_.ObjectId -eq $objectId } 25 | $policy.PermissionsToSecrets | Should -Contain 'get' 26 | $policy.PermissionsToSecrets | Should -Contain 'list' 27 | } 28 | It "Data Factory" { 29 | $objectId = $dataFactory.Identity.PrincipalId 30 | $policy = $keyVault.AccessPolicies | Where-Object { $_.ObjectId -eq $objectId } 31 | $policy.PermissionsToSecrets | Should -Contain 'get' 32 | $policy.PermissionsToSecrets | Should -Contain 'list' 33 | } 34 | } 35 | Context "Data Lake" -Skip { 36 | It "Data Factory" { 37 | $objectId = $dataFactory.Identity.PrincipalId 38 | $role = $dataLakeRoles | Where-Object { $_.ObjectId -eq $objectId } 39 | $role.RoleDefinitionName | Should -Be 'Storage Blob Data Contributor' 40 | } 41 | } 42 | Context "Databricks" -Skip { 43 | It "Data Factory" { 44 | $objectId = $dataFactory.Identity.PrincipalId 45 | $role = $databricksRoles | Where-Object { $_.ObjectId -eq $objectId } 46 | $role.RoleDefinitionName | Should -Be 'Contributor' 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /infrastructure-as-code/tests/Data/DataLake.Tests.ps1: -------------------------------------------------------------------------------- 1 | BeforeAll { 2 | $rgName = $env:ACC_TEST_RESOURCE_GROUP_DATA 3 | $dataLakeName = $env:ACC_TEST_DATA_LAKE_NAME 4 | $location = $env:ACC_TEST_LOCATION 5 | $dataLakeSkuName = $env:ACC_TEST_DATA_LAKE_SKU_NAME 6 | 7 | $vnet = $env:ACC_TEST_VNET 8 | $snet_storage = $env:ACC_TEST_SNET_STORAGE 9 | $snet_dbricks_public = $env:ACC_TEST_SNET_DATABRICKS_PUBLIC 10 | $snet_dbricks_private = $env:ACC_TEST_SNET_DATABRICKS_PRIVATE 11 | } 12 | Describe "Data Lake" -Tag "Acceptance" { 13 | BeforeAll { 14 | $dataLake = Get-AzStorageAccount -ResourceGroupName $rgName -Name $dataLakeName 15 | $containers = Get-AzStorageContainer -Context $dataLake.Context 16 | } 17 | Context "Resource" { 18 | # TODO: For some reason the following test is always failing, even all the other ones pass. 19 | It "Exists" -Skip { 20 | $dataLake | Should -Not -BeNullOrEmpty 21 | } 22 | It "ProvisioningState Is Succeeded" { 23 | $dataLake.ProvisioningState | Should -Be "Succeeded" 24 | } 25 | It "Is In Expected Location" { 26 | $dataLake.Location | Should -Be $location 27 | } 28 | } 29 | Context "Features"{ 30 | It "Enables Hierarchical Namespace" { 31 | $dataLake.EnableHierarchicalNamespace | Should -BeTrue 32 | } 33 | } 34 | Context "SKU"{ 35 | It "Tier Is Standard" { 36 | $dataLake.Sku.Tier | Should -Be "Standard" 37 | } 38 | It "Name Matches" { 39 | $dataLake.Sku.Name | Should -Be $dataLakeSkuName 40 | } 41 | It "Kind Is StorageV2" { 42 | $dataLake.kind | Should -Be "StorageV2" 43 | } 44 | It "Access Tier Is Hot" { 45 | $dataLake.AccessTier| Should -Be "Hot" 46 | } 47 | } 48 | Context "Containers"{ 49 | It "Contains Landing" { 50 | $containers.Name | Should -Contain "landing" 51 | } 52 | It "Contains Refined" { 53 | $containers.Name | Should -Contain "refined" 54 | } 55 | It "Contains Trusted" { 56 | $containers.Name | Should -Contain "trusted" 57 | } 58 | It "Are All Private" { 59 | $containers.Permission.PublicAccess | Where-Object { $_ -eq 'Off' } | Should -HaveCount $containers.Count 60 | } 61 | } 62 | Context "Network" -Skip { 63 | It "Is allowed on $snet_dbricks_public" { 64 | [bool]($dataLake.properties.networkAcls.virtualNetworkRules | Where-Object id -Like "*$vnet/subnets/$snet_dbricks_public" || state -eq "Succeeded") | Should -Be true 65 | } 66 | It "Is allowed on $snet_dbricks_private" { 67 | [bool]($dataLake.properties.networkAcls.virtualNetworkRules | Where-Object id -Like "*$vnet/subnets/$snet_dbricks_private" || state -eq "Succeeded") | Should -Be true 68 | } 69 | It "Is allowed on $snet_storage" { 70 | [bool]($dataLake.properties.networkAcls.virtualNetworkRules | Where-Object id -Like "*$vnet/subnets/$snet_storage" || state -eq "Succeeded") | Should -Be true 71 | } 72 | It "IPRules Contains 0 Elements" { 73 | $dataLake.NetworkRuleSet.IpRules | Should -HaveCount 0 74 | } 75 | } 76 | Context "Security"{ 77 | It "Enables Blob Encryption" { 78 | $dataLake.Encryption.Services.Blob.Enabled | Should -Be true 79 | } 80 | It "Enables Files Encryption" { 81 | $dataLake.Encryption.Services.File.Enabled | Should -Be true 82 | } 83 | It "Enables HTTPS Traffic Only" { 84 | $dataLake.EnableHttpsTrafficOnly | Should -Be true 85 | } 86 | It "Enables TLS 1.2" { 87 | $dataLake.MinimumTlsVersion | Should -Be 'TLS1_2' 88 | } 89 | } 90 | } 91 | 92 | -------------------------------------------------------------------------------- /infrastructure-as-code/tests/Data/ResourceGroup.Tests.ps1: -------------------------------------------------------------------------------- 1 | BeforeAll { 2 | $rgName = $env:ACC_TEST_RESOURCE_GROUP_DATA 3 | $location = $env:ACC_TEST_LOCATION 4 | } 5 | 6 | Describe "Resource Group Data" -Tag "Acceptance" { 7 | BeforeAll { 8 | $rg = Get-AzResourceGroup -Name $rgName 9 | $rgLocks = Get-AzResourceLock -ResourceGroupName $rgName -AtScope 10 | } 11 | Context "Resource" { 12 | It "Exists" { 13 | $rg | Should -Not -BeNullOrEmpty 14 | } 15 | It "ProvisioningState Is Succeeded" { 16 | $rg.ProvisioningState | Should -Be "Succeeded" 17 | } 18 | It "Is In Expected Location" { 19 | $rg.Location | Should -Be $location 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /quickstart/.gitignore: -------------------------------------------------------------------------------- 1 | config.dev.json -------------------------------------------------------------------------------- /quickstart/configs/cloud-setup/template.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0.0", 3 | "project": { 4 | "name": "", 5 | "alias": "", 6 | "location": "eastus" 7 | }, 8 | "servicePrincipals": [ 9 | "SP--DevTest" 10 | ], 11 | "azureDevOps": { 12 | "organization": "", 13 | "project": "" 14 | }, 15 | "environments": { 16 | "dev": { 17 | "subscriptionId": "", 18 | "servicePrincipalName": "SP--DevTest", 19 | "serviceConnectionName": "spn-iac-dev" 20 | }, 21 | "qa": { 22 | "subscriptionId": "", 23 | "servicePrincipalName": "SP--DevTest", 24 | "serviceConnectionName": "spn-iac-qa" 25 | }, 26 | "prod": { 27 | "subscriptionId": "", 28 | "servicePrincipalName": "SP--DevTest", 29 | "serviceConnectionName": "spn-iac-prod" 30 | } 31 | }, 32 | "output": [ 33 | { 34 | "template": "quickstart/configs/dataops/template.json", 35 | "file": "quickstart/outputs/hol.json" 36 | } 37 | ] 38 | } -------------------------------------------------------------------------------- /quickstart/configs/dataops/template.json: -------------------------------------------------------------------------------- 1 | { 2 | "Project": { 3 | "Name": "", 4 | "Alias": "", 5 | "ServicePrincipalSecret": "" 6 | }, 7 | "RepoConfiguration": { 8 | "RepoName": "", 9 | "AzureDevOpsOrganization": "", 10 | "AzureDevOpsOrganizationURI": "https://dev.azure.com/", 11 | "AzureDevOpsProject": "", 12 | "TemplateGitUrl": "https://github.com/microsoft/devsquad-in-a-day.git", 13 | "MinimumApprovers": "1", 14 | "Pipelines": [ 15 | { 16 | "Name": "-iac-ci", 17 | "SourceYamlPath": "azure-pipelines/iac/iac-ci.yml", 18 | "BuildPolicy": { 19 | "Name": "-iac-ci build policy", 20 | "PathFilter": "/infrastructure-as-code/**" 21 | } 22 | }, 23 | { 24 | "Name": "-iac-cd", 25 | "SourceYamlPath": "azure-pipelines/iac/iac-cd.yml" 26 | }, 27 | { 28 | "Name": "-lib-ci", 29 | "SourceYamlPath": "azure-pipelines/lib/lib-ci.yml", 30 | "BuildPolicy": { 31 | "Name": "-lib-ci build policy", 32 | "PathFilter": "/data-platform/src/dataopslib/dataopslib/*" 33 | } 34 | }, 35 | { 36 | "Name": "-lib-cd", 37 | "SourceYamlPath": "azure-pipelines/lib/lib-cd.yml" 38 | }, 39 | { 40 | "Name": "-databricks-ci", 41 | "SourceYamlPath": "azure-pipelines/databricks/databricks-ci.yml", 42 | "BuildPolicy": { 43 | "Name": "-databricks-ci build policy", 44 | "PathFilter": "/data-platform/notebooks/*" 45 | } 46 | }, 47 | { 48 | "Name": "-databricks-cd", 49 | "SourceYamlPath": "azure-pipelines/databricks/databricks-notebooks-cd.yml" 50 | }, 51 | { 52 | "Name": "-databricks-lib-cd", 53 | "SourceYamlPath": "azure-pipelines/databricks/databricks-lib-cd.yml" 54 | }, 55 | { 56 | "Name": "-adf-cd", 57 | "SourceYamlPath": "azure-pipelines/adf/adf-cd.yml" 58 | } 59 | ] 60 | } 61 | } -------------------------------------------------------------------------------- /quickstart/docs/0b-prerequisites-advanced.md: -------------------------------------------------------------------------------- 1 | ### PowerShell 2 | 3 | 1. The lab requires **PowerShell 7.1** with [PowerShell Core](https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.core/?view=powershell-7.1) module, which can be installed either on Windows or Linux. 4 | 5 | - If you have a preference to run PowerShell on Windows, follow the [Installing PowerShell on Windows](https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell-core-on-windows?view=powershell-7.1) instructions. 6 | - Otherwise, follow the [Installing PowerShell on Linux](https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell-core-on-linux?view=powershell-7.1) instructions. 7 | 8 | > If you installed PowerShell on Linux, make sure to start it by running the `pwsh` command on your terminal. 9 | 10 | 2. Install the [Azure Az PowerShell module](https://docs.microsoft.com/en-us/powershell/azure/install-az-ps?view=azps-6.2.0). 11 | 12 | ### Azure CLI 13 | 14 | 1. Install the [Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). 15 | 16 | 2. Install the [Azure DevOps CLI](https://docs.microsoft.com/en-us/azure/devops/cli/?view=azure-devops). 17 | 18 | ### Other Tools 19 | 20 | 1. Install the [Databricks CLI](https://docs.microsoft.com/en-us/azure/databricks/dev-tools/cli/#install-the-cli). -------------------------------------------------------------------------------- /quickstart/docs/1b-create-hol-setup-file-advanced.md: -------------------------------------------------------------------------------- 1 | # Setup the Configuration File 2 | 3 | 1. Open the terminal, clone the source code of the lab and go to the `hol` directory. 4 | 5 | ```bash 6 | # URL Pattern: https://:@dev.azure.com///_git/ 7 | git clone https://holsetup:2je7narfoc2rusvewdjpfnlcn3pyponyrpsko3w5b6z26zj4wpoa@dev.azure.com/csu-devsquad/advworks-dataops/_git/hol 8 | cd hol 9 | ``` 10 | 11 | 2. Create a new config file based on an existing template: 12 | 13 | ```bash 14 | cp quickstart/configs/cloud-setup/template.json quickstart/configs/cloud-setup/hol.json 15 | ``` 16 | 17 | 3. Open the file `quickstart/configs/cloud-setup/hol.json` using your favorite editor and replace the following values: 18 | 19 | |Argument|Description|Example| 20 | |-----|-----------|-------| 21 | |<_orgName_>|Azure DevOps organization name where you will execute the Hands-On Lab|_MyOrg_| 22 | |<_projectName_>|Name of the existing project inside Azure DevOps where you will execute the Hands-On Lab|_MyDataOpsHOL_| 23 | |<_projectAlias_>|An unique string with less than 8 characteres that will be used as part of your resource group names|_dataops_| 24 | |<_subscriptionId_>|Azure Subscription ID where the resources will be deployed|_f7e5bb9e-0f98-4c5d-a5c1-a9154bf3cd61_| 25 | 26 | 3. You can also edit the subscriptions and service principals used by each one of the three environments: `dev`, `qa` and `prod`. 27 | 28 | ## Next Step 29 | 30 | * [Create the pre-required Azure resources](./2-create-prereqs-azure.md) -------------------------------------------------------------------------------- /quickstart/docs/4-delete-resources.md: -------------------------------------------------------------------------------- 1 | # After the Hands-On Lab 2 | 3 | 4 | 1. Execute the following script to delete all your Azure resources created on this Hands-On Lab. 5 | 6 | ```poweershell 7 | ./quickstart/scripts/cloud-setup/Delete-AzureResources.ps1 8 | ``` 9 | 10 | - You will be prompted to provide your `projectName` and `projectAlias`: 11 | 12 | 13 | |Argument|Description| 14 | |-----|-----------| 15 | |_projectName_|Name of the Azure DevOps project used for this Hands-On Lab| 16 | |_projectAlias_|An unique string with less than 8 characteres that was used as part of your resource group names| 17 | 18 | 19 | 2. Open [Azure DevOps](https://dev.azure.com) and delete the following resources: 20 | 21 | - Azure DevOps Artifact Feed: 22 | - Delete the Artifact feed: `Artifacts` -> `lib-packages` -> `Feed Settings` -> `Delete Feed` 23 | - Purge the Artifact feed: 24 | Go to `Deleted Feeds` inside `Artifacts`. Select again `lib-packages` -> `Feed Settings` -> `Permanently Delete Feed` 25 | 26 | - Azure DevOps Project: 27 | - `Project Settings` -> `Overview` -> `Delete` -------------------------------------------------------------------------------- /quickstart/docs/images/ad-connect-directory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/ad-connect-directory.png -------------------------------------------------------------------------------- /quickstart/docs/images/artifact-feed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/artifact-feed.png -------------------------------------------------------------------------------- /quickstart/docs/images/azure-prereqs-script.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/azure-prereqs-script.png -------------------------------------------------------------------------------- /quickstart/docs/images/create-artifact-feed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/create-artifact-feed.png -------------------------------------------------------------------------------- /quickstart/docs/images/install-git-tools.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/install-git-tools.png -------------------------------------------------------------------------------- /quickstart/docs/images/open-cloud-powershell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/open-cloud-powershell.png -------------------------------------------------------------------------------- /quickstart/docs/images/open-org-settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/open-org-settings.png -------------------------------------------------------------------------------- /quickstart/docs/images/project-creation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/project-creation.png -------------------------------------------------------------------------------- /quickstart/docs/images/quickstart-buildservice-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/quickstart-buildservice-1.png -------------------------------------------------------------------------------- /quickstart/docs/images/quickstart-buildservice-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/quickstart-buildservice-2.png -------------------------------------------------------------------------------- /quickstart/docs/images/quickstart-buildservice-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/quickstart-buildservice-3.png -------------------------------------------------------------------------------- /quickstart/docs/images/vm-lab-rdp-connection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/vm-lab-rdp-connection.png -------------------------------------------------------------------------------- /quickstart/docs/images/vm-lab-reset-password.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/devsquad-dataops/c75f355ba3021bd2c284ce641941436648ec72fe/quickstart/docs/images/vm-lab-reset-password.png -------------------------------------------------------------------------------- /quickstart/schemas/dataops/config.schema.1.0.0.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/schema#", 3 | "type": "object", 4 | "properties": { 5 | "Project": { 6 | "type": "object", 7 | "properties": { 8 | "Name": { 9 | "type": "string" 10 | }, 11 | "Alias": { 12 | "type": "string" 13 | } 14 | }, 15 | "required": [ 16 | "Name" 17 | ] 18 | }, 19 | "RepoConfiguration": { 20 | "type": "object", 21 | "properties": { 22 | "RepoName": { 23 | "type": "string" 24 | }, 25 | "AzureDevOpsOrganization": { 26 | "type": "string" 27 | }, 28 | "AzureDevOpsOrganizationURI": { 29 | "type": "string" 30 | }, 31 | "AzureDevOpsProject": { 32 | "type": "string" 33 | }, 34 | "TemplateGitUrl": { 35 | "type": "string" 36 | }, 37 | "MinimumApprovers": { 38 | "type": "string" 39 | }, 40 | "Pipelines": { 41 | "type": "array", 42 | "items": { 43 | "type": "object", 44 | "properties": { 45 | "Name": { 46 | "type": "string" 47 | }, 48 | "SourceYamlPath": { 49 | "type": "string" 50 | }, 51 | "BuildPolicy": { 52 | "type": "object", 53 | "properties": { 54 | "Name": { 55 | "type": "string" 56 | }, 57 | "PathFilter": { 58 | "type": "string" 59 | } 60 | }, 61 | "required": [ 62 | "Name", 63 | "PathFilter" 64 | ] 65 | } 66 | }, 67 | "required": [ 68 | "Name", 69 | "SourceYamlPath" 70 | ] 71 | } 72 | } 73 | }, 74 | "required": [ 75 | "AzureDevOpsOrganizationURI", 76 | "AzureDevOpsProject", 77 | "MinimumApprovers", 78 | "Pipelines", 79 | "RepoName" 80 | ] 81 | } 82 | }, 83 | "required": [ 84 | "Project", 85 | "RepoConfiguration" 86 | ] 87 | } 88 | -------------------------------------------------------------------------------- /quickstart/scripts/cloud-setup/Delete-AzureResources.ps1: -------------------------------------------------------------------------------- 1 | param ( 2 | # Azure DevOps organization where you want to create this HOL resources 3 | [parameter(mandatory=$true)] 4 | [string]$projectName = '', 5 | 6 | # Simple alias for the project (less than 8 characters) 7 | [parameter(mandatory=$true)] 8 | [string]$projectAlias = '' 9 | ) 10 | 11 | $filter = ("rg-" + $projectAlias + "-") 12 | 13 | $myServicePrincipals = Get-AzADServicePrincipal -DisplayName ("SP-"+$projectName+"-DevTest") | Select-Object DisplayName 14 | Write-Host "`nService Principal`n-----------------" 15 | $myServicePrincipals | ForEach-Object { 16 | Write-Host $_.DisplayName 17 | } 18 | 19 | $myResourceGroups = Get-AzResourceGroup | Where-Object ResourceGroupName -match $filter | Select-Object ResourceGroupName 20 | Write-Host "`nResource Group`n-----------------" 21 | $myResourceGroups | ForEach-Object { 22 | Write-Host $_.ResourceGroupName 23 | } 24 | 25 | $answer = read-host -prompt "`nPress 'y' to delete all the resources listed above." 26 | $yesList = 'yes','y' 27 | 28 | if ($yesList -contains $answer.ToLower()) { 29 | if ($myServicePrincipals.Count -gt 0){ 30 | Get-AzADServicePrincipal -DisplayName ("SP-"+$projectName+"-DevTest") | ForEach-Object { Remove-AzADServicePrincipal -ApplicationId $_.ApplicationId -Force } 31 | Get-AzADApplication -DisplayName ("SP-"+$projectName+"-DevTest") | ForEach-Object { Remove-AzADApplication -ApplicationId $_.ApplicationId -Force } 32 | } 33 | if ($myResourceGroups.Count -gt 0){ 34 | Get-AzResourceGroup | Where-Object ResourceGroupName -match $filter | Remove-AzResourceGroup -AsJob -Force 35 | } 36 | } else { 37 | Write-Host "[Command Skipped] Your resources were not deleted." 38 | } -------------------------------------------------------------------------------- /quickstart/scripts/cloud-setup/Deploy-AzurePreReqs.ps1: -------------------------------------------------------------------------------- 1 | Using module ../modules/Azure.psm1 2 | Using module ../modules/RepoOperations.psm1 3 | Using module ../modules/Validation.psm1 4 | Using module ../modules/Logging.psm1 5 | 6 | [cmdletbinding()] 7 | param( 8 | [Parameter(Mandatory)] $ConfigurationFile 9 | ) 10 | 11 | Write-Host "Cloud setup starting..." 12 | 13 | BeginScope -Scope "Config file validation" 14 | 15 | [string]$schemaFilePath = "./quickstart/schemas/cloud-setup/config.schema.1.0.0.json" 16 | 17 | [bool]$validConfigFile = IsValidConfigurationFile -ConfigurationFile $ConfigurationFile -SchemaFile $schemaFilePath -Verbose:$VerbosePreference 18 | 19 | if (! $validConfigFile) 20 | { 21 | EndScope 22 | throw "Invalid properties on the '$ConfigurationFile' configuration file." 23 | exit 1 24 | } 25 | 26 | [hashtable]$config = LoadConfigurationFile -ConfigurationFile $ConfigurationFile -Verbose:$VerbosePreference 27 | [bool]$validConfigFileProperties = IsValidConfigurationFileProperties -Configuration $config -Verbose:$VerbosePreference 28 | 29 | if (! $validConfigFileProperties) 30 | { 31 | EndScope 32 | throw "The '$ConfigurationFile' config file has invalid properties." 33 | exit 1 34 | } 35 | 36 | EndScope 37 | 38 | [hashtable]$servicePrincipals = SetupServicePrincipals -Configuration $config -Verbose:$VerbosePreference 39 | SetupEnvironments -Configuration $config -ServicePrincipals $servicePrincipals -Verbose:$VerbosePreference 40 | 41 | #Save this password inside output hol file 42 | $ServicePrincipalSecret = $ServicePrincipals[$config.servicePrincipals[0]].clientSecret 43 | 44 | PublishOutputs -Configuration $config -ServicePrincipalSecret $ServicePrincipalSecret -Verbose:$VerbosePreference 45 | 46 | Write-Host "Done!" 47 | -------------------------------------------------------------------------------- /quickstart/scripts/cloud-setup/Replace-TemplateArgs.ps1: -------------------------------------------------------------------------------- 1 | param ( 2 | # Azure DevOps organization where you want to create this HOL resources 3 | [parameter(mandatory=$true)] 4 | [string]$orgName = '', 5 | 6 | # Azure DevOps organization where you want to create this HOL resources 7 | [parameter(mandatory=$true)] 8 | [string]$projectName = '', 9 | 10 | # ID of the Azure Subscription where you want to create this HOL resources 11 | [parameter(mandatory=$true)] 12 | [string]$subscriptionId = '', 13 | 14 | # ID of the Azure Subscription where you want to create this HOL resources 15 | [string]$configsTemplate = 'quickstart/configs/cloud-setup/template.json', 16 | 17 | # ID of the Azure Subscription where you want to create this HOL resources 18 | [string]$configsOutput = 'quickstart/configs/cloud-setup/hol.json' 19 | ) 20 | 21 | $randomLetter = (65..90) + (97..122) | Get-Random -Count 1 | % {[char]$_} 22 | $gUUID = New-Guid 23 | $projectAlias = $randomLetter + $gUUID.Guid.Split("-")[0].Substring(0, 7) 24 | Write-Output "Project alias generated: " $projectAlias.ToLower() 25 | 26 | (Get-Content $configsTemplate) ` 27 | -replace '', $projectName ` 28 | -replace '', $projectAlias.ToLower() ` 29 | -replace '', $orgName ` 30 | -replace '', $subscriptionId | 31 | Out-File $configsOutput 32 | -------------------------------------------------------------------------------- /quickstart/scripts/cloud-setup/Validate-AzurePreReqs.ps1: -------------------------------------------------------------------------------- 1 | Using module ../modules/Validation.psm1 2 | 3 | [cmdletbinding()] 4 | param( 5 | [Parameter(Mandatory)] $ConfigurationsDirectory 6 | ) 7 | 8 | [string]$schemaFilePath = "./quickstart/schemas/cloud-setup/config.schema.1.0.0.json" 9 | 10 | ValidateConfigurationsDirectory -ConfigurationsDirectory $ConfigurationsDirectory -SchemaFile $schemaFilePath -Verbose:$VerbosePreference 11 | -------------------------------------------------------------------------------- /quickstart/scripts/dataops/Deploy-AzureDevOps.ps1: -------------------------------------------------------------------------------- 1 | Using module ../modules/AzureDevOps.psm1 2 | Using module ../modules/RepoOperations.psm1 3 | Using module ../modules/Validation.psm1 4 | 5 | [cmdletbinding()] 6 | param( 7 | [Parameter(Mandatory)] $ConfigurationFile, 8 | [boolean] $UseSSH = $false, 9 | [boolean] $UsePAT = $false 10 | ) 11 | 12 | $schemaFilePath = "./quickstart/schemas/dataops/config.schema.1.0.0.json" 13 | 14 | $validConfigFile = IsValidConfigurationFile -ConfigurationFile $ConfigurationFile -SchemaFile $schemaFilePath -Verbose:$VerbosePreference 15 | 16 | if (! $validConfigFile) 17 | { 18 | throw "Invalid properties on the '$ConfigurationFile' configuration file." 19 | } 20 | 21 | $config = LoadConfigurationFile -ConfigurationFile $ConfigurationFile -Verbose:$VerbosePreference 22 | 23 | BeginScope -Scope "Enviornments" 24 | 25 | $environment = 'dev','qa','prod','databricks-dev','databricks-qa','databricks-prod' 26 | 27 | foreach ($env in $environment) 28 | { 29 | CreateAzDevOpsRepoEnviorment -Environment $env -RepoConfiguration $config.RepoConfiguration -Verbose:$VerbosePreference 30 | } 31 | 32 | EndScope 33 | 34 | $branches = 'develop','qa','main' 35 | 36 | try { 37 | 38 | CreateAzDevOpsVariableGroups -RepoConfiguration $config.RepoConfiguration -Verbose:$VerbosePreference 39 | 40 | $repoInfo = CreateAzureDevopsRepository -RepoConfiguration $config.RepoConfiguration -Verbose:$VerbosePreference 41 | 42 | $directory = CloneRepo -RepoInfo $repoInfo -UseSSH $UseSSH -UsePAT $UsePAT -Verbose:$VerbosePreference 43 | ImportTemplateRepoToDomainRepo -Branches $branches -RepoConfiguration $config.RepoConfiguration -Directory $directory[0] -Verbose:$VerbosePreference 44 | 45 | CreateAzDevOpsYamlPipelines -DefaultBranch $branches[0] -RepoConfiguration $config.RepoConfiguration -Verbose:$VerbosePreference 46 | 47 | UpdateIaCParameters -Branch $branches[0] -Configuration $config -Directory $directory[0] -Verbose:$VerbosePreference 48 | 49 | } 50 | catch { 51 | throw "Couldn't access, create or clone the repository" 52 | } 53 | 54 | foreach ($branch in $branches) 55 | { 56 | CreateAzDevOpsRepoApprovalPolicy -Branch $branch -RepoInfo $repoInfo -RepoConfiguration $config.RepoConfiguration -Verbose:$VerbosePreference 57 | CreateAzDevOpsRepoCommentPolicy -Branch $branch -RepoInfo $repoInfo -RepoConfiguration $config.RepoConfiguration -Verbose:$VerbosePreference 58 | CreateAzDevOpsRepoBuildPolicy -Branch $branch -RepoInfo $repoInfo -RepoConfiguration $config.RepoConfiguration -Verbose:$VerbosePreference 59 | } 60 | 61 | -------------------------------------------------------------------------------- /quickstart/scripts/dataops/Validate-AzureDevOps.ps1: -------------------------------------------------------------------------------- 1 | Using module ../modules/Validation.psm1 2 | 3 | [cmdletbinding()] 4 | param( 5 | [Parameter()] $ConfigurationsDirectory 6 | ) 7 | 8 | [string]$schemaFilePath = "./schemas/dataops/config.schema.1.0.0.json" 9 | [string]$ConfigurationsDirectory = "./configs/dataops/" 10 | 11 | ValidateConfigurationsDirectory -ConfigurationsDirectory $ConfigurationsDirectory -SchemaFile $schemaFilePath -Verbose:$VerbosePreference -------------------------------------------------------------------------------- /quickstart/scripts/labvm/Deploy-LabVM.ps1: -------------------------------------------------------------------------------- 1 | param ( 2 | [string]$sourceVhd = 'https://stlabvm.blob.core.windows.net/vhd/labvm-001.vhd', 3 | 4 | # Size of the VHD snapshot in bytes 5 | [long]$vhdSizeBytes = 136367309312, 6 | 7 | # Region to deploy the VM 8 | [string]$location = 'eastus', 9 | 10 | # VM resource group 11 | [string]$resourceGroupName = 'rg-labvm', 12 | 13 | # disk VM name 14 | [string]$diskName = 'disklabvmeastus' 15 | ) 16 | 17 | Write-Host "[DevSquad In a Day] Initializing the lab VM disk" 18 | 19 | $diskconfig = New-AzDiskConfig -SkuName 'Premium_LRS' -OsType 'Windows' -UploadSizeInBytes $vhdSizeBytes -Location $location -CreateOption 'Upload' 20 | 21 | New-AzResourceGroup $resourceGroupName -Location $location 22 | 23 | New-AzDisk -ResourceGroupName $resourceGroupName -DiskName $diskName -Disk $diskconfig 24 | 25 | $diskSas = Grant-AzDiskAccess -ResourceGroupName $resourceGroupName -DiskName $diskName -DurationInSecond 86400 -Access 'Write' 26 | 27 | $disk = Get-AzDisk -ResourceGroupName $resourceGroupName -DiskName $diskName 28 | 29 | Write-Host "[DevSquad In a Day] Copying the lab VM snapshot to your subscription" 30 | 31 | azcopy copy $sourceVhd $diskSas.AccessSAS --blob-type PageBlob 32 | 33 | Revoke-AzDiskAccess -ResourceGroupName $resourceGroupName -DiskName $diskName 34 | 35 | Write-Host "[DevSquad In a Day] Creating the lab VM associated resources" 36 | 37 | $subnetName = 'labDsiadSubNet' 38 | $singleSubnet = New-AzVirtualNetworkSubnetConfig ` 39 | -Name $subnetName ` 40 | -AddressPrefix 10.0.0.0/24 41 | 42 | $vnetName = "labDsiadVnet" 43 | $vnet = New-AzVirtualNetwork ` 44 | -Name $vnetName -ResourceGroupName $resourceGroupName ` 45 | -Location $location ` 46 | -AddressPrefix 10.0.0.0/16 ` 47 | -Subnet $singleSubnet 48 | 49 | 50 | $nsgName = "labDsiadNsg" 51 | $rdpRule = New-AzNetworkSecurityRuleConfig -Name myRdpRule -Description "Allow RDP" ` 52 | -Access Allow -Protocol Tcp -Direction Inbound -Priority 110 ` 53 | -SourceAddressPrefix Internet -SourcePortRange * ` 54 | -DestinationAddressPrefix * -DestinationPortRange 3389 55 | 56 | $nsg = New-AzNetworkSecurityGroup ` 57 | -ResourceGroupName $resourceGroupName ` 58 | -Location $location ` 59 | -Name $nsgName -SecurityRules $rdpRule 60 | 61 | $ipName = "labDsiadIP" 62 | $pip = New-AzPublicIpAddress ` 63 | -Name $ipName -ResourceGroupName $resourceGroupName ` 64 | -Location $location ` 65 | -AllocationMethod Dynamic 66 | 67 | $nicName = "labDsiadNicName" 68 | $nic = New-AzNetworkInterface -Name $nicName ` 69 | -ResourceGroupName $resourceGroupName ` 70 | -Location $location -SubnetId $vnet.Subnets[0].Id ` 71 | -PublicIpAddressId $pip.Id ` 72 | -NetworkSecurityGroupId $nsg.Id 73 | 74 | $vmName = "vm-lab" 75 | $vmConfig = New-AzVMConfig -VMName $vmName -VMSize "Standard_E2s_v3" 76 | 77 | $vm = Add-AzVMNetworkInterface -VM $vmConfig -Id $nic.Id 78 | 79 | $vm = Set-AzVMOSDisk -VM $vm -ManagedDiskId $disk.Id ` 80 | -CreateOption Attach -Windows 81 | 82 | $vm = Set-AzVMBootDiagnostic -VM $vm -Disable 83 | 84 | Write-Host "[DevSquad In a Day] Creating the lab VM" 85 | 86 | New-AzVM -ResourceGroupName $resourceGroupName -Location $location -VM $vm 87 | 88 | Write-Host "[DevSquad In a Day] Done!" 89 | -------------------------------------------------------------------------------- /quickstart/scripts/modules/Common.psm1: -------------------------------------------------------------------------------- 1 | class Argument { 2 | 3 | static [void] AssertIsNotNullOrEmpty([string] $paramName, [string] $paramValue) { 4 | if ([string]::IsNullOrWhiteSpace($paramValue)) { 5 | throw "Parameter $paramName is mandatory!" 6 | } 7 | } 8 | 9 | static [void] AssertIsNotNull([string] $paramName, [string] $paramValue) { 10 | if (-Not $paramValue) { 11 | throw "Parameter $paramName is null!" 12 | } 13 | } 14 | 15 | static [void] AssertIsMatch([string] $paramName, [string] $paramValue, [string] $regexPattern) { 16 | if (-Not ($paramValue -match $regexPattern)) { 17 | throw "Parameter $paramName value '$paramValue' does not match $regexPattern" 18 | } 19 | } 20 | } 21 | 22 | function LoadConfigurationFile { 23 | [cmdletbinding()] 24 | [OutputType([hashtable])] 25 | param( 26 | [Parameter(Mandatory)] [string] $ConfigurationFile 27 | ) 28 | [Argument]::AssertIsNotNullOrEmpty("ConfigurationFile", $ConfigurationFile) 29 | 30 | if (-Not (Test-Path $ConfigurationFile)) { 31 | throw "Configuration File: $ConfigurationFile does not exists!" 32 | } 33 | 34 | return (Get-Content -Path $ConfigurationFile | ConvertFrom-Json -AsHashtable) 35 | } 36 | -------------------------------------------------------------------------------- /quickstart/scripts/modules/Logging.psm1: -------------------------------------------------------------------------------- 1 | function BeginScope 2 | { 3 | param ( 4 | [Parameter(Mandatory)] [string] $Scope 5 | ) 6 | 7 | Write-Host "##[group]$Scope" 8 | } 9 | 10 | function EndScope 11 | { 12 | Write-Host "##[endgroup]" 13 | } 14 | 15 | function LogInfo 16 | { 17 | param ( 18 | [Parameter(Mandatory)] [string] $Message 19 | ) 20 | 21 | Write-Host "##[command]$Message" 22 | } 23 | 24 | function LogWarning 25 | { 26 | param ( 27 | [Parameter(Mandatory)] [string] $Message 28 | ) 29 | 30 | Write-Host "##[warning]$Message" 31 | } 32 | 33 | function LogOk 34 | { 35 | param ( 36 | [Parameter(Mandatory)] [string] $Message 37 | ) 38 | 39 | Write-Host "##[section]$Message" 40 | } 41 | 42 | function LogError 43 | { 44 | param ( 45 | [Parameter(Mandatory)] [string] $Message 46 | ) 47 | 48 | Write-Host "##[error]$Message" 49 | } 50 | --------------------------------------------------------------------------------