├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── repository-issue.md │ └── solution-accelerator-request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── codeql.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── classical ├── README.md ├── aml-cli-v2 │ ├── data-science │ │ ├── environment │ │ │ └── train-conda.yml │ │ └── src │ │ │ ├── evaluate.py │ │ │ ├── prep.py │ │ │ ├── register.py │ │ │ └── train.py │ ├── data │ │ ├── taxi-batch.csv │ │ ├── taxi-data.csv │ │ └── taxi-request.json │ └── mlops │ │ ├── azureml │ │ ├── deploy │ │ │ ├── batch │ │ │ │ ├── batch-deployment.yml │ │ │ │ └── batch-endpoint.yml │ │ │ └── online │ │ │ │ ├── online-deployment.yml │ │ │ │ ├── online-endpoint.yml │ │ │ │ └── score.py │ │ └── train │ │ │ ├── data.yml │ │ │ ├── pipeline.yml │ │ │ └── train-env.yml │ │ ├── devops-pipelines │ │ ├── deploy-batch-endpoint-pipeline.yml │ │ ├── deploy-model-training-pipeline.yml │ │ └── deploy-online-endpoint-pipeline.yml │ │ └── github-actions │ │ ├── deploy-batch-endpoint-pipeline-classical.yml │ │ ├── deploy-model-training-pipeline-classical.yml │ │ └── deploy-online-endpoint-pipeline-classical.yml ├── python-sdk-v1 │ ├── config-aml.yml │ ├── data-science │ │ ├── environment │ │ │ ├── batch.yml │ │ │ ├── batch_monitor.yml │ │ │ ├── train.yml │ │ │ └── train_monitor.yml │ │ ├── notebooks │ │ │ └── experiment1.ipynb │ │ ├── src │ │ │ ├── evaluate.py │ │ │ ├── prep.py │ │ │ ├── score.py │ │ │ └── train.py │ │ └── tests │ │ │ └── test.py │ ├── data │ │ ├── scoring │ │ │ └── credit_batch.csv │ │ └── training │ │ │ └── credit.csv │ └── mlops │ │ └── devops-pipelines │ │ ├── deploy-drift-detection.yml │ │ ├── deploy-model-batch-scoring.yml │ │ └── deploy-model-training-pipeline.yml ├── python-sdk-v2 │ ├── data-science │ │ ├── environment │ │ │ └── train-conda.yml │ │ └── src │ │ │ ├── evaluate │ │ │ └── evaluate.py │ │ │ ├── prep │ │ │ └── prep.py │ │ │ ├── register │ │ │ └── register.py │ │ │ └── train │ │ │ └── train.py │ ├── data │ │ ├── taxi-batch.csv │ │ ├── taxi-data.csv │ │ └── taxi-request.json │ └── mlops │ │ ├── azureml │ │ └── train │ │ │ └── run_pipeline.py │ │ └── devops-pipelines │ │ ├── deploy-batch-endpoint-pipeline.yml │ │ ├── deploy-model-training-pipeline.yml │ │ └── deploy-online-endpoint-pipeline.yml └── rai-aml-cli-v2 │ ├── data-science │ ├── environment │ │ ├── train-conda.yml │ │ └── train-requirements.txt │ ├── experiment │ │ ├── evaluate.ipynb │ │ ├── prep.ipynb │ │ ├── register.ipynb │ │ ├── requirements.txt │ │ └── train.ipynb │ └── src │ │ ├── evaluate │ │ ├── evaluate.py │ │ └── test_evaluate.py │ │ ├── prep │ │ ├── prep.py │ │ └── test_prep.py │ │ ├── register │ │ └── register.py │ │ └── train │ │ ├── test_train.py │ │ └── train.py │ ├── data │ ├── taxi-batch.csv │ ├── taxi-data.csv │ └── taxi-request.json │ └── mlops │ ├── azureml │ ├── deploy │ │ ├── batch │ │ │ ├── batch-deployment.yml │ │ │ └── batch-endpoint.yml │ │ └── online │ │ │ ├── online-deployment.yml │ │ │ ├── online-endpoint.yml │ │ │ └── score.py │ └── train │ │ ├── pipeline.yml │ │ └── train-env.yml │ └── devops-pipelines │ ├── deploy-batch-endpoint-pipeline.yml │ ├── deploy-model-training-pipeline.yml │ ├── deploy-online-endpoint-pipeline.yml │ ├── register-rai-components.yml │ └── trigger_.code-search ├── config-infra-dev.yml ├── config-infra-prod.yml ├── cv ├── README.md ├── aml-cli-v2 │ ├── data-science │ │ ├── environment │ │ │ ├── Dockerfile │ │ │ ├── ndv4-topo.xml │ │ │ └── requirements.txt │ │ ├── requirements-tests.txt │ │ ├── src │ │ │ ├── image_io.py │ │ │ ├── model │ │ │ │ ├── __init__.py │ │ │ │ ├── model_loader.py │ │ │ │ ├── swin_models.py │ │ │ │ ├── test_model.py │ │ │ │ └── torchvision_models.py │ │ │ ├── profiling.py │ │ │ └── train.py │ │ └── tests │ │ │ ├── conftest.py │ │ │ ├── model │ │ │ └── test_model_loader.py │ │ │ └── test_train.py │ ├── data │ │ └── sample-request.json │ └── mlops │ │ ├── azureml │ │ ├── deploy │ │ │ └── online │ │ │ │ ├── online-deployment.yml │ │ │ │ └── online-endpoint.yml │ │ └── train │ │ │ ├── create_stanford_dogs_dataset.yaml │ │ │ ├── pipeline.yaml │ │ │ ├── train-env.yaml │ │ │ └── train.yaml │ │ ├── devops-pipelines │ │ ├── deploy-model-training-pipeline.yml │ │ └── deploy-online-endpoint-pipeline.yml │ │ └── github-actions │ │ ├── deploy-model-training-pipeline.yml │ │ └── deploy-online-endpoint-pipeline.yml └── python-sdk-v1 │ ├── config-aml.yml │ ├── data-science │ ├── environment │ │ └── training │ │ │ ├── azureml_environment.json │ │ │ └── conda_dependencies.yml │ ├── notebooks │ │ └── .gitkeep │ ├── src │ │ ├── evaluate.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── dataset.py │ │ │ └── net.py │ │ ├── prep.py │ │ └── train.py │ └── tests │ │ └── .gitkeep │ ├── data │ └── training │ │ └── image_labels.csv │ └── mlops │ └── devops-pipelines │ └── deploy-model-training-pipeline.yml ├── environment.yml ├── infrastructure ├── bicep │ ├── bicepconfig.json │ ├── main.bicep │ ├── main.json │ ├── modules │ │ ├── aml_computecluster.bicep │ │ ├── aml_workspace.bicep │ │ ├── application_insights.bicep │ │ ├── container_registry.bicep │ │ ├── key_vault.bicep │ │ └── storage_account.bicep │ └── pipelines │ │ └── bicep-ado-deploy-infra.yml └── terraform │ ├── aml_deploy.tf │ ├── devops-pipelines │ └── tf-ado-deploy-infra.yml │ ├── github-actions │ └── tf-gha-deploy-infra.yml │ ├── locals.tf │ ├── main.tf │ ├── modules │ ├── aml-workspace │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ ├── application-insights │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ ├── container-registry │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ ├── data-explorer │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ ├── key-vault │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ ├── resource-group │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ └── storage-account │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ └── variables.tf ├── nlp ├── README.md ├── aml-cli-v2 │ ├── data-science │ │ ├── environments │ │ │ ├── inference │ │ │ │ └── conda_env.yml │ │ │ └── training │ │ │ │ ├── Dockerfile │ │ │ │ └── requirements.txt │ │ └── src │ │ │ └── summarization │ │ │ ├── compare.py │ │ │ ├── prepare.py │ │ │ ├── register.py │ │ │ ├── run.py │ │ │ └── score.py │ ├── data │ │ └── nlp-summarization-request.json │ └── mlops │ │ ├── azureml │ │ ├── deploy │ │ │ └── online │ │ │ │ ├── online-deployment.yml │ │ │ │ └── online-endpoint.yml │ │ └── train │ │ │ ├── pipeline.yml │ │ │ └── train-env.yml │ │ ├── devops-pipelines │ │ ├── deploy-model-training-pipeline.yml │ │ └── deploy-online-endpoint-pipeline.yml │ │ └── github-actions │ │ ├── deploy-model-training-pipeline.yml │ │ └── deploy-online-endpoint-pipeline.yml └── python-sdk-v2 │ ├── data-science │ ├── environments │ │ ├── inference │ │ │ └── conda_env.yml │ │ └── training │ │ │ ├── Dockerfile │ │ │ └── requirements.txt │ └── src │ │ └── summarization │ │ ├── compare.py │ │ ├── prepare.py │ │ ├── register.py │ │ ├── run.py │ │ └── score.py │ ├── data │ └── nlp-summarization-request.json │ └── mlops │ ├── azureml │ └── train │ │ └── pipeline-train.py │ └── devops-pipelines │ └── deploy-model-training-pipeline.yml └── requirements.txt /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.209.6/containers/python-3-miniconda/.devcontainer/base.Dockerfile 2 | FROM mcr.microsoft.com/vscode/devcontainers/miniconda:0.202.1-3 3 | 4 | # Update the conda environment according to the environment.yml file in the project. 5 | COPY environment.yml /tmp/conda-tmp/ 6 | RUN /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml && rm -rf /tmp/conda-tmp -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.222.0/containers/python-3-miniconda 3 | { 4 | "name": "Miniconda (Python 3)", 5 | "build": { 6 | "context": "..", 7 | "dockerfile": "Dockerfile", 8 | }, 9 | // Set *default* container specific settings.json values on container create. 10 | "settings": { 11 | "python.defaultInterpreterPath": "/opt/conda/bin/python", 12 | }, 13 | // Add the IDs of extensions you want installed when the container is created. 14 | "extensions": [ 15 | "ms-python.python", 16 | "ms-python.vscode-pylance", 17 | "ms-toolsai.vscode-ai", 18 | ], 19 | // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. 20 | "remoteUser": "vscode", 21 | "features": { 22 | "azure-cli": "latest" 23 | }, 24 | "onCreateCommand": "az extension add -n ml -y" 25 | } -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: MLOps v2 solution accelerators discussions. 4 | url: https://github.com/azure/mlops-v2/discussions 5 | about: >- 6 | Please ask questions and start open-ended discussions here. 7 | Use issues for well-defined work in the solution accelerator repositories. 8 | - name: Azure ML CLI issues. 9 | url: https://github.com/azure/azure-cli-extensions/issues/new/choose 10 | about: Please open issues with the Azure ML CLI extension here. 11 | - name: Azure ML Python SDK issues. 12 | url: https://github.com/azure/azure-sdk-for-python/issues/new/choose 13 | about: Please open issues with the Azure ML Python SDK here. 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/repository-issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Suggest an enhancement for this repository. 3 | about: Have an idea for improvements to this repository? 4 | title: '[repo] ' 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | ## Why? 10 | 11 | <!-- What problem is this solving? --> 12 | 13 | ## How? 14 | 15 | <!-- How are you suggesting it gets solved? --> 16 | 17 | ## Anything else? 18 | 19 | <!-- 20 | Links? References? Anything that will give us more context about the issue that you are encountering! 21 | --> 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/solution-accelerator-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Request or suggest a new solution accelerator. 3 | about: Have an idea for a new solution accelerator? 4 | title: '[new accelerator] <title>' 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | ## Why doesn't an existing solution accelerator work? 10 | 11 | <!-- Concisely explain why a new solution accelerator is needed. --> 12 | 13 | ## What work is needed? 14 | 15 | <!-- 16 | Concisely explain the infrastructure and MLOps work needed. 17 | Include as much detail as possible in how this would fit into the 18 | overall solution accelerator. 19 | --> 20 | 21 | ## Anything else? 22 | 23 | <!-- 24 | Links? References? Anything that will give us more context about the issue that you are encountering! 25 | --> 26 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # PR into Azure/mlops-project-template 2 | 3 | ## Checklist 4 | 5 | I have: 6 | 7 | - [ ] read and followed the contributing guidelines 8 | 9 | ## Changes 10 | 11 | - 12 | 13 | fixes # 14 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ "main", main*, feature* ] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [ "main" ] 9 | schedule: 10 | - cron: '0 3 * * 3' 11 | 12 | jobs: 13 | analyze: 14 | name: Analyze 15 | runs-on: ubuntu-latest 16 | permissions: 17 | actions: read 18 | contents: read 19 | security-events: write 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | language: [ 'python' ] 25 | 26 | steps: 27 | - name: Checkout repository 28 | uses: actions/checkout@v3 29 | 30 | # Initializes the CodeQL tools for scanning. 31 | - name: Initialize CodeQL 32 | uses: github/codeql-action/init@v2 33 | with: 34 | languages: ${{ matrix.language }} 35 | queries: security-and-quality 36 | 37 | - name: Perform CodeQL Analysis 38 | uses: github/codeql-action/analyze@v2 39 | with: 40 | category: "/language:${{matrix.language}}" 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Mac stuff 7 | .DS_Store 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | # Terraform 133 | .terraform.lock.hcl 134 | terraform.tfstate 135 | terraform.tfstate.backup 136 | .terraform.tfstate.lock.info 137 | .terraform 138 | terraform.tfvars 139 | 140 | /infrastructure/bicep/main.json 141 | ! /infrastructure/bicep/bicepconfig.json 142 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.2.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | 9 | # Opinionated code formatter to forget about formatting 10 | - repo: https://github.com/psf/black 11 | rev: 21.12b0 12 | hooks: 13 | - id: black 14 | additional_dependencies: ['click==8.0.4'] 15 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Azure MLOps (v2) solution accelerator 2 | 3 | [Main README file](https://github.com/Azure/mlops-v2/blob/main/README.md) 4 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK --> 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | <!-- END MICROSOFT SECURITY.MD BLOCK --> 42 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /classical/README.md: -------------------------------------------------------------------------------- 1 | ![Header](https://github.com/Azure/mlops-v2/blob/main/documentation/repositoryfiles/mlopsheader.jpg) 2 | 3 | # Azure MLOps (v2) Pattern: Azure Machine Learning - Classical Machine Learning 4 | 5 | This repository includes all use case specific codebase to be deployed as the inner loop for the [MLOps v2](https://github.com/Azure/mlops-v2) solution accelerator. 6 | 7 | The repo itself functions as a standalone entity that agnosticly holds all Azure Machine Learning - Classical Machine Learning requirements for this architectual pattern. 8 | 9 | 10 | ## 📐 Pattern Architectures: Key concepts 11 | 12 | This repository follows the architecture linked below: 13 | 14 | | Link | AI Pattern | 15 | | ------------------------------------------------------- | ----------------------------------------------------------------------- | 16 | | [Pattern AML CML](https://github.com/Azure/mlops-v2/blob/main/documentation/architecturepattern/AzureML_CML_Architecture.png) | Azure Machine Learning - Classical Machine Learning | 17 | 18 | 19 | ## 👤 Getting started 20 | 21 | Please visit [MLOps v2](https://github.com/Azure/mlops-v2) for the initial deployment of this inner loop pattern. 22 | 23 | 24 | ## ‼️ Feedback or Issues 25 | 26 | Please visit [MLOps v2](https://github.com/Azure/mlops-v2) and file an **issue** or go to Microsofts internal SharePoint site to hand in any feedback. 27 | 28 | 29 | ## Contributing 30 | 31 | This project welcomes contributions and suggestions. To learn more visit the contributing section in the [MLOps v2](https://github.com/Azure/mlops-v2) solution accelerator. 32 | 33 | Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 34 | 35 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. 36 | 37 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 38 | 39 | 40 | ## Trademarks 41 | 42 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 43 | trademarks or logos is subject to and must follow 44 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/legal/intellectualproperty/trademarks/usage/general). 45 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 46 | Any use of third-party trademarks or logos are subject to those third-party's policies. 47 | -------------------------------------------------------------------------------- /classical/aml-cli-v2/data-science/environment/train-conda.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - defaults 3 | - anaconda 4 | - conda-forge 5 | dependencies: 6 | - python=3.7.5 7 | - pip 8 | - pip: 9 | - azureml-mlflow==1.38.0 10 | - azure-ai-ml==1.0.0 11 | - pyarrow==10.0.0 12 | - scikit-learn==0.24.1 13 | - pandas==1.2.1 14 | - joblib==1.0.0 15 | - matplotlib==3.3.3 16 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client 17 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector -------------------------------------------------------------------------------- /classical/aml-cli-v2/data-science/src/register.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | """ 4 | Registers trained ML model if deploy flag is True. 5 | """ 6 | 7 | import argparse 8 | from pathlib import Path 9 | import pickle 10 | import mlflow 11 | 12 | import os 13 | import json 14 | 15 | def parse_args(): 16 | '''Parse input arguments''' 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--model_name', type=str, help='Name under which model will be registered') 20 | parser.add_argument('--model_path', type=str, help='Model directory') 21 | parser.add_argument('--evaluation_output', type=str, help='Path of eval results') 22 | parser.add_argument( 23 | "--model_info_output_path", type=str, help="Path to write model info JSON" 24 | ) 25 | args, _ = parser.parse_known_args() 26 | print(f'Arguments: {args}') 27 | 28 | return args 29 | 30 | 31 | def main(args): 32 | '''Loads model, registers it if deply flag is True''' 33 | 34 | with open((Path(args.evaluation_output) / "deploy_flag"), 'rb') as infile: 35 | deploy_flag = int(infile.read()) 36 | 37 | mlflow.log_metric("deploy flag", int(deploy_flag)) 38 | deploy_flag=1 39 | if deploy_flag==1: 40 | 41 | print("Registering ", args.model_name) 42 | 43 | # load model 44 | model = mlflow.sklearn.load_model(args.model_path) 45 | 46 | # log model using mlflow 47 | mlflow.sklearn.log_model(model, args.model_name) 48 | 49 | # register logged model using mlflow 50 | run_id = mlflow.active_run().info.run_id 51 | model_uri = f'runs:/{run_id}/{args.model_name}' 52 | mlflow_model = mlflow.register_model(model_uri, args.model_name) 53 | model_version = mlflow_model.version 54 | 55 | # write model info 56 | print("Writing JSON") 57 | dict = {"id": "{0}:{1}".format(args.model_name, model_version)} 58 | output_path = os.path.join(args.model_info_output_path, "model_info.json") 59 | with open(output_path, "w") as of: 60 | json.dump(dict, fp=of) 61 | 62 | else: 63 | print("Model will not be registered!") 64 | 65 | if __name__ == "__main__": 66 | 67 | mlflow.start_run() 68 | 69 | # ---------- Parse Arguments ----------- # 70 | # -------------------------------------- # 71 | 72 | args = parse_args() 73 | 74 | lines = [ 75 | f"Model name: {args.model_name}", 76 | f"Model path: {args.model_path}", 77 | f"Evaluation output path: {args.evaluation_output}", 78 | ] 79 | 80 | for line in lines: 81 | print(line) 82 | 83 | main(args) 84 | 85 | mlflow.end_run() 86 | -------------------------------------------------------------------------------- /classical/aml-cli-v2/data/taxi-request.json: -------------------------------------------------------------------------------- 1 | {"input_data": [[2.86,40.66551971,-73.98258972,1,40.69801331,-73.97357178,0,2,1,1,19,21,3,56,1,1,19,21,21,57], 2 | [3.98,40.68072128,-73.931633,1,40.6909523,-73.99185181,0,2,0,1,4,21,44,11,0,1,4,21,59,35]] 3 | } -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/azureml/deploy/batch/batch-deployment.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json 2 | name: batch-dp 3 | endpoint_name: taxi-fare-batch 4 | model: azureml:taxi-model@latest 5 | compute: azureml:batch-cluster 6 | resources: 7 | instance_count: 1 8 | max_concurrency_per_instance: 2 9 | mini_batch_size: 10 10 | output_action: append_row 11 | output_file_name: predictions.csv 12 | retry_settings: 13 | max_retries: 3 14 | timeout: 30 15 | error_threshold: -1 16 | logging_level: info -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/azureml/deploy/batch/batch-endpoint.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/batchEndpoint.schema.json 2 | name: taxi-fare-batch 3 | description: taxi cost batch endpoint 4 | auth_mode: aad_token -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/azureml/deploy/online/online-deployment.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json 2 | name: blue 3 | endpoint_name: taxi-fare-online 4 | model: azureml:taxi-model@latest 5 | instance_type: Standard_DS3_v2 6 | instance_count: 1 7 | -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/azureml/deploy/online/online-endpoint.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json 2 | name: taxi-fare-online 3 | description: taxi cost online endpoint 4 | auth_mode: key -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/azureml/deploy/online/score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/classical/aml-cli-v2/mlops/azureml/deploy/online/score.py -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/azureml/train/data.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/data.schema.json 2 | 3 | # Supported paths include: 4 | # local: ./<path> 5 | # blob: https://<account_name>.blob.core.windows.net/<container_name>/<path> 6 | # ADLS gen2: abfss://<file_system>@<account_name>.dfs.core.windows.net/<path>/ 7 | # Datastore: azureml://datastores/<data_store_name>/paths/<path> 8 | type: uri_file 9 | name: taxi-data 10 | description: taxi dataset 11 | path: ../../../data/taxi-data.csv -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/azureml/train/pipeline.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json 2 | type: pipeline 3 | experiment_name: taxi-fare-training 4 | experiment_name: taxi-fare-training 5 | description: Training Pipeline to train a model that predicts taxi fare price 6 | 7 | # <inputs_and_outputs> 8 | inputs: 9 | input: 10 | type: uri_file 11 | path: azureml:taxi-data@latest 12 | enable_monitoring: 'false' 13 | table_name: 'taximonitoring' 14 | 15 | outputs: 16 | train_data: 17 | val_data: 18 | test_data: 19 | trained_model: 20 | evaluation_output: 21 | model_info_output_path: 22 | # </inputs_and_outputs> 23 | 24 | # <jobs> 25 | settings: 26 | default_datastore: azureml:workspaceblobstore 27 | default_compute: azureml:cpu-cluster 28 | continue_on_step_failure: false 29 | 30 | jobs: 31 | prep_data: 32 | name: prep_data 33 | display_name: prep-data 34 | code: ../../../data-science/src 35 | command: >- 36 | python prep.py 37 | --raw_data ${{inputs.raw_data}} 38 | --train_data ${{outputs.train_data}} 39 | --val_data ${{outputs.val_data}} 40 | --test_data ${{outputs.test_data}} 41 | --enable_monitoring ${{inputs.enable_monitoring}} 42 | --table_name ${{inputs.table_name}} 43 | environment: azureml:taxi-train-env@latest 44 | inputs: 45 | raw_data: ${{parent.inputs.input}} 46 | enable_monitoring: ${{parent.inputs.enable_monitoring}} 47 | table_name: ${{parent.inputs.table_name}} 48 | outputs: 49 | train_data: ${{parent.outputs.train_data}} 50 | val_data: ${{parent.outputs.val_data}} 51 | test_data: ${{parent.outputs.test_data}} 52 | 53 | train_model: 54 | name: train_model 55 | display_name: train-model 56 | code: ../../../data-science/src 57 | command: >- 58 | python train.py 59 | --train_data ${{inputs.train_data}} 60 | --model_output ${{outputs.model_output}} 61 | environment: azureml:taxi-train-env@latest 62 | inputs: 63 | train_data: ${{parent.jobs.prep_data.outputs.train_data}} 64 | outputs: 65 | model_output: ${{parent.outputs.trained_model}} 66 | 67 | evaluate_model: 68 | name: evaluate_model 69 | display_name: evaluate-model 70 | code: ../../../data-science/src 71 | command: >- 72 | python evaluate.py 73 | --model_name ${{inputs.model_name}} 74 | --model_input ${{inputs.model_input}} 75 | --test_data ${{inputs.test_data}} 76 | --evaluation_output ${{outputs.evaluation_output}} 77 | environment: azureml:taxi-train-env@latest 78 | inputs: 79 | model_name: "taxi-model" 80 | model_input: ${{parent.jobs.train_model.outputs.model_output}} 81 | test_data: ${{parent.jobs.prep_data.outputs.test_data}} 82 | outputs: 83 | evaluation_output: ${{parent.outputs.evaluation_output}} 84 | 85 | register_model: 86 | name: register_model 87 | display_name: register-model 88 | code: ../../../data-science/src 89 | command: >- 90 | python register.py 91 | --model_name ${{inputs.model_name}} 92 | --model_path ${{inputs.model_path}} 93 | --evaluation_output ${{inputs.evaluation_output}} 94 | --model_info_output_path ${{outputs.model_info_output_path}} 95 | environment: azureml:taxi-train-env@latest 96 | inputs: 97 | model_name: "taxi-model" 98 | model_path: ${{parent.jobs.train_model.outputs.model_output}} 99 | evaluation_output: ${{parent.jobs.evaluate_model.outputs.evaluation_output}} 100 | outputs: 101 | model_info_output_path: ${{parent.outputs.model_info_output_path}} 102 | # </jobs> 103 | -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/azureml/train/train-env.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json 2 | name: taxi-train-env 3 | image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04 4 | conda_file: ../../../data-science/environment/train-conda.yml 5 | description: Environment created from a Docker image plus Conda environment to train taxi model. -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-batch-endpoint-pipeline 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | - name: version 14 | value: aml-cli-v2 15 | - name: endpoint_name 16 | value: taxi-batch-$(namespace)$(postfix)$(environment) 17 | - name: endpoint_type 18 | value: batch 19 | 20 | trigger: none 21 | 22 | pool: 23 | vmImage: ubuntu-20.04 24 | 25 | resources: 26 | repositories: 27 | - repository: mlops-templates # Template Repo 28 | name: mlops-templates 29 | type: git 30 | ref: main 31 | 32 | stages: 33 | - stage: CreateBatchEndpoint 34 | displayName: Create/Update Batch Endpoint 35 | jobs: 36 | - job: DeployBatchEndpoint 37 | steps: 38 | - checkout: self 39 | path: s/ 40 | - task: Bash@3 41 | displayName: "Create checkout repository folder(s)" 42 | inputs: 43 | targetType: "inline" 44 | script: | 45 | set -e 46 | mkdir "$(Build.Repository.Name)" 47 | mkdir "mlops-templates" 48 | - checkout: mlops-templates 49 | path: s/templates/ 50 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates 51 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates 52 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates 53 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates 54 | parameters: 55 | cluster_name: batch-cluster # name must match cluster name in deployment file below 56 | size: STANDARD_DS3_V2 57 | min_instances: 0 58 | max_instances: 5 59 | cluster_tier: dedicated 60 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates 61 | parameters: 62 | endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml 63 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates 64 | parameters: 65 | deployment_name: taxi-batch-dp 66 | deployment_file: mlops/azureml/deploy/batch/batch-deployment.yml 67 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates 68 | parameters: 69 | deployment_name: taxi-batch-dp 70 | sample_request: data/taxi-batch.csv 71 | request_type: uri_file #either uri_folder or uri_file 72 | 73 | -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-model-training-pipeline 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | - name: version 14 | value: aml-cli-v2 15 | 16 | trigger: none 17 | 18 | pool: 19 | vmImage: ubuntu-20.04 20 | 21 | resources: 22 | repositories: 23 | - repository: mlops-templates # Template Repo 24 | name: mlops-templates 25 | type: git 26 | ref: main 27 | 28 | stages: 29 | - stage: DeployTrainingPipeline 30 | displayName: Deploy Training Pipeline 31 | jobs: 32 | - job: DeployTrainingPipeline 33 | timeoutInMinutes: 120 # how long to run the job before automatically cancelling 34 | steps: 35 | - checkout: self 36 | path: s/ 37 | - task: Bash@3 38 | displayName: "Prevent repos dir warnings" 39 | inputs: 40 | targetType: "inline" 41 | script: | 42 | set -e 43 | mkdir "$(Build.Repository.Name)" 44 | mkdir "mlops-templates" 45 | - checkout: mlops-templates 46 | path: s/templates/ 47 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates 48 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates 49 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates 50 | - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates 51 | parameters: 52 | environment_name: taxi-train-env 53 | environment_file: mlops/azureml/train/train-env.yml 54 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates 55 | parameters: 56 | cluster_name: cpu-cluster 57 | size: Standard_DS3_v2 58 | min_instances: 0 59 | max_instances: 4 60 | cluster_tier: low_priority 61 | - template: templates/${{ variables.version }}/register-data.yml@mlops-templates 62 | parameters: 63 | data_type: uri_file 64 | data_name: taxi-data 65 | data_file: mlops/azureml/train/data.yml 66 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates 67 | parameters: 68 | pipeline_file: mlops/azureml/train/pipeline.yml 69 | experiment_name: $(environment)_taxi_fare_train_$(Build.SourceBranchName) 70 | display_name: $(environment)_taxi_fare_run_$(Build.BuildID) 71 | enable_monitoring: $(enable_monitoring) 72 | -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-online-endpoint-pipeline 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | - name: version 14 | value: aml-cli-v2 15 | - name: endpoint_name 16 | value: taxi-online-$(namespace)$(postfix)$(environment) 17 | - name: endpoint_type 18 | value: online 19 | 20 | trigger: none 21 | 22 | pool: 23 | vmImage: ubuntu-20.04 24 | 25 | resources: 26 | repositories: 27 | - repository: mlops-templates # Template Repo 28 | name: mlops-templates 29 | type: git 30 | ref: main 31 | 32 | stages: 33 | - stage: CreateOnlineEndpoint 34 | displayName: Create/Update Online Endpoint 35 | jobs: 36 | - job: DeployOnlineEndpoint 37 | steps: 38 | - checkout: self 39 | path: s/ 40 | - task: Bash@3 41 | displayName: "Create checkout repository folder(s)" 42 | inputs: 43 | targetType: "inline" 44 | script: | 45 | set -e 46 | mkdir "$(Build.Repository.Name)" 47 | mkdir "mlops-templates" 48 | - checkout: mlops-templates 49 | path: s/templates/ 50 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates 51 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates 52 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates 53 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates 54 | parameters: 55 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml 56 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates 57 | parameters: 58 | deployment_name: taxi-online-dp 59 | deployment_file: mlops/azureml/deploy/online/online-deployment.yml 60 | - template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates 61 | parameters: 62 | traffic_allocation: taxi-online-dp=100 63 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates 64 | parameters: 65 | deployment_name: taxi-online-dp 66 | sample_request: data/taxi-request.json 67 | request_type: json 68 | -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/github-actions/deploy-batch-endpoint-pipeline-classical.yml: -------------------------------------------------------------------------------- 1 | name: deploy-batch-endpoint-pipeline 2 | 3 | on: 4 | workflow_dispatch: 5 | jobs: 6 | set-env-branch: 7 | runs-on: ubuntu-latest 8 | outputs: 9 | config-file: ${{ steps.set-output-defaults.outputs.config-file }} 10 | steps: 11 | - id: set-prod-branch 12 | name: set-prod-branch 13 | if: ${{ github.ref == 'refs/heads/main'}} 14 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV; 15 | - id: set-dev-branch 16 | name: setdevbranch 17 | if: ${{ github.ref != 'refs/heads/main'}} 18 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV; 19 | - id: set-output-defaults 20 | name: set-output-defaults 21 | run: | 22 | echo "config-file=$config_env" >> $GITHUB_OUTPUT; 23 | get-config: 24 | needs: set-env-branch 25 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main 26 | with: 27 | file_name: ${{ needs.set-env-branch.outputs.config-file}} 28 | create-compute: 29 | needs: get-config 30 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main 31 | with: 32 | cluster_name: batch-cluster 33 | size: STANDARD_DS3_V2 34 | min_instances: 0 35 | max_instances: 5 36 | cluster_tier: low_priority 37 | resource_group: ${{ needs.get-config.outputs.resource_group }} 38 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 39 | secrets: 40 | creds: ${{secrets.AZURE_CREDENTIALS}} 41 | create-endpoint: 42 | needs: [get-config, create-compute] 43 | uses: Azure/mlops-templates/.github/workflows/create-endpoint.yml@main 44 | with: 45 | resource_group: ${{ needs.get-config.outputs.resource_group }} 46 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 47 | endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml 48 | endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.bep) }} 49 | endpoint_type: batch 50 | secrets: 51 | creds: ${{secrets.AZURE_CREDENTIALS}} 52 | create-deployment: 53 | uses: Azure/mlops-templates/.github/workflows/create-deployment.yml@main 54 | needs: [get-config, create-endpoint] 55 | with: 56 | resource_group: ${{ needs.get-config.outputs.resource_group }} 57 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 58 | endpoint_file: mlops/azureml/deploy/batch/batch-deployment.yml 59 | endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.bep) }} 60 | endpoint_type: batch 61 | deployment_name: eptestdeploy 62 | secrets: 63 | creds: ${{secrets.AZURE_CREDENTIALS}} 64 | -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/github-actions/deploy-model-training-pipeline-classical.yml: -------------------------------------------------------------------------------- 1 | name: deploy-model-training-pipeline 2 | 3 | on: 4 | workflow_dispatch: 5 | jobs: 6 | set-env-branch: 7 | runs-on: ubuntu-latest 8 | outputs: 9 | config-file: ${{ steps.set-output-defaults.outputs.config-file }} 10 | steps: 11 | - id: set-prod-branch 12 | name: set-prod-branch 13 | if: ${{ github.ref == 'refs/heads/main'}} 14 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV; 15 | - id: set-dev-branch 16 | name: setdevbranch 17 | if: ${{ github.ref != 'refs/heads/main'}} 18 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV; 19 | - id: set-output-defaults 20 | name: set-output-defaults 21 | run: | 22 | echo "config-file=$config_env" >> $GITHUB_OUTPUT; 23 | get-config: 24 | needs: set-env-branch 25 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main 26 | with: 27 | file_name: ${{ needs.set-env-branch.outputs.config-file}} 28 | register-environment: 29 | needs: get-config 30 | uses: Azure/mlops-templates/.github/workflows/register-environment.yml@main 31 | with: 32 | resource_group: ${{ needs.get-config.outputs.resource_group }} 33 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 34 | environment_file: mlops/azureml/train/train-env.yml 35 | conda_file: data-science/environment/train-conda.yml 36 | secrets: 37 | creds: ${{secrets.AZURE_CREDENTIALS}} 38 | register-dataset: 39 | needs: get-config 40 | uses: Azure/mlops-templates/.github/workflows/register-dataset.yml@main 41 | with: 42 | resource_group: ${{ needs.get-config.outputs.resource_group }} 43 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 44 | name: taxi-data 45 | data_file: mlops/azureml/train/data.yml 46 | secrets: 47 | creds: ${{secrets.AZURE_CREDENTIALS}} 48 | create-compute: 49 | needs: [get-config] 50 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main 51 | with: 52 | cluster_name: cpu-cluster 53 | size: Standard_DS3_v2 54 | min_instances: 0 55 | max_instances: 4 56 | cluster_tier: low_priority 57 | resource_group: ${{ needs.get-config.outputs.resource_group }} 58 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 59 | secrets: 60 | creds: ${{secrets.AZURE_CREDENTIALS}} 61 | run-model-training-pipeline: 62 | needs: [get-config, register-environment, register-dataset, create-compute] 63 | uses: Azure/mlops-templates/.github/workflows/run-pipeline.yml@main 64 | with: 65 | resource_group: ${{ needs.get-config.outputs.resource_group }} 66 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 67 | parameters-file: mlops/azureml/train/pipeline.yml 68 | job-name: test 69 | secrets: 70 | creds: ${{secrets.AZURE_CREDENTIALS}} 71 | -------------------------------------------------------------------------------- /classical/aml-cli-v2/mlops/github-actions/deploy-online-endpoint-pipeline-classical.yml: -------------------------------------------------------------------------------- 1 | name: deploy-online-endpoint-pipeline 2 | 3 | on: 4 | workflow_dispatch: 5 | jobs: 6 | set-env-branch: 7 | runs-on: ubuntu-latest 8 | outputs: 9 | config-file: ${{ steps.set-output-defaults.outputs.config-file }} 10 | steps: 11 | - id: set-prod-branch 12 | name: set-prod-branch 13 | if: ${{ github.ref == 'refs/heads/main'}} 14 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV; 15 | - id: set-dev-branch 16 | name: setdevbranch 17 | if: ${{ github.ref != 'refs/heads/main'}} 18 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV; 19 | - id: set-output-defaults 20 | name: set-output-defaults 21 | run: | 22 | echo "config-file=$config_env" >> $GITHUB_OUTPUT; 23 | get-config: 24 | needs: set-env-branch 25 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main 26 | with: 27 | file_name: ${{ needs.set-env-branch.outputs.config-file}} 28 | create-endpoint: 29 | needs: get-config 30 | uses: Azure/mlops-templates/.github/workflows/create-endpoint.yml@main 31 | with: 32 | resource_group: ${{ needs.get-config.outputs.resource_group }} 33 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 34 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml 35 | endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }} 36 | endpoint_type: online 37 | secrets: 38 | creds: ${{secrets.AZURE_CREDENTIALS}} 39 | create-deployment: 40 | uses: Azure/mlops-templates/.github/workflows/create-deployment.yml@main 41 | needs: [get-config, create-endpoint] 42 | with: 43 | resource_group: ${{ needs.get-config.outputs.resource_group }} 44 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 45 | endpoint_file: mlops/azureml/deploy/online/online-deployment.yml 46 | endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }} 47 | endpoint_type: online 48 | deployment_name: taxi-online-dp 49 | secrets: 50 | creds: ${{secrets.AZURE_CREDENTIALS}} 51 | allocate-traffic: 52 | uses: Azure/mlops-templates/.github/workflows/allocate-traffic.yml@main 53 | needs: [get-config, create-deployment] 54 | with: 55 | resource_group: ${{ needs.get-config.outputs.resource_group }} 56 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 57 | traffic_allocation: taxi-online-dp=100 58 | endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }} 59 | secrets: 60 | creds: ${{secrets.AZURE_CREDENTIALS}} 61 | -------------------------------------------------------------------------------- /classical/python-sdk-v1/config-aml.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | variables: 5 | 6 | ap_vm_image: ubuntu-20.04 7 | 8 | # Training pipeline settings 9 | 10 | # Training dataset settings 11 | training_dataset_name: uci-credit 12 | training_dataset_description: uci_credit 13 | training_dataset_local_path: data/training/ 14 | training_dataset_path_on_datastore: data/training/ 15 | training_dataset_type: local 16 | training_dataset_storage_url: 'https://azureaidemostorage.blob.core.windows.net/data/' 17 | 18 | # Training AzureML Environment name 19 | training_env_name: credit-training 20 | 21 | # Training AzureML Environment conda yaml 22 | training_env_conda_yaml: data-science/environment/train.yml 23 | 24 | # Name for the training pipeline 25 | training_pipeline_name: credit-training 26 | 27 | # Compute target for pipeline 28 | training_target: cpu-cluster 29 | training_target_sku: STANDARD_D2_V2 30 | training_target_min_nodes: 0 31 | training_target_max_nodes: 4 32 | 33 | # Training arguments specification 34 | training_arguments: '' 35 | 36 | # Training datasets specification 37 | # Syntax: <name>:<version>:<mode>:<steps (names separated by +)> 38 | training_datasets: uci-credit:1:download:prep 39 | 40 | # Name under which the model will be registered 41 | model_name: credit-ci 42 | 43 | # Batch pipeline settings 44 | 45 | # Batch scoring dataset settings 46 | scoring_dataset_name: credit-batch-input 47 | scoring_dataset_description: credit-batch-input 48 | scoring_dataset_local_path: data/scoring/ 49 | scoring_dataset_path_on_datastore: data/scoring/ 50 | scoring_dataset_type: local 51 | scoring_dataset_storage_url: 'https://azureaidemostorage.blob.core.windows.net/data/' 52 | 53 | # Batch AzureML Environment name 54 | batch_env_name: credit-batch 55 | 56 | # Batch AzureML Environment conda yaml 57 | batch_env_conda_yaml: data-science/environment/batch.yml 58 | 59 | # Name for the batch scoring pipeline 60 | batch_pipeline_name: credit-batch-scoring 61 | 62 | # Compute target for pipeline 63 | batch_target: cpu-cluster 64 | #not needed because batch uses the same target as training 65 | # batch_target_sku: STANDARD_D2_V2 66 | # batch_target_min_nodes: 0 67 | # batch_target_max_nodes: 4 68 | 69 | # Input batch dataset 70 | batch_input_dataset_name: credit-batch-input 71 | 72 | # Output dataset with results 73 | batch_output_dataset_name: credit-batch-output 74 | batch_output_path_on_datastore: credit-batch-scoring-results/{run-id} 75 | batch_output_filename: results.csv 76 | 77 | # Parallelization settings 78 | batch_mini_batch_size: 8 79 | batch_error_threshold: 1 80 | batch_process_count_per_node: 1 81 | batch_node_count: 1 82 | 83 | # Monitoring settings 84 | scoring_table_name: scoringdata 85 | training_table_name: mlmonitoring 86 | 87 | -------------------------------------------------------------------------------- /classical/python-sdk-v1/data-science/environment/batch.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: mnist-batch 5 | channels: 6 | - defaults 7 | - anaconda 8 | - conda-forge 9 | dependencies: 10 | - python=3.7.5 11 | - pip 12 | - pip: 13 | - azureml-defaults==1.38.0 14 | - azureml-mlflow==1.38.0 15 | - azureml-sdk==1.38.0 16 | - azureml-interpret==1.38.0 17 | - scikit-learn==0.24.1 18 | - pandas==1.2.1 19 | - joblib==1.0.0 20 | - matplotlib==3.3.3 -------------------------------------------------------------------------------- /classical/python-sdk-v1/data-science/environment/batch_monitor.yml: -------------------------------------------------------------------------------- 1 | name: batch-monitoring 2 | channels: 3 | - defaults 4 | - anaconda 5 | - conda-forge 6 | dependencies: 7 | - python=3.7.5 8 | - pip 9 | - pip: 10 | - azureml-defaults==1.38.0 11 | - azureml-mlflow==1.38.0 12 | - azureml-sdk==1.38.0 13 | - azureml-interpret==1.38.0 14 | - scikit-learn==0.24.1 15 | - pandas==1.2.1 16 | - joblib==1.0.0 17 | - matplotlib==3.3.3 18 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector 19 | -------------------------------------------------------------------------------- /classical/python-sdk-v1/data-science/environment/train.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: mnist-train 5 | channels: 6 | - defaults 7 | - anaconda 8 | - conda-forge 9 | dependencies: 10 | - python=3.7.5 11 | - pip 12 | - pip: 13 | - azureml-mlflow==1.38.0 14 | - azureml-sdk==1.38.0 15 | - scikit-learn==0.24.1 16 | - pandas==1.2.1 17 | - joblib==1.0.0 18 | - matplotlib==3.3.3 19 | - fairlearn==0.7.0 20 | - azureml-contrib-fairness==1.38.0 21 | - interpret-community==0.24.1 22 | - interpret-core==0.2.7 23 | - azureml-interpret==1.38.0 -------------------------------------------------------------------------------- /classical/python-sdk-v1/data-science/environment/train_monitor.yml: -------------------------------------------------------------------------------- 1 | name: train 2 | channels: 3 | - defaults 4 | - anaconda 5 | - conda-forge 6 | dependencies: 7 | - python=3.7.5 8 | - pip 9 | - pip: 10 | - azureml-mlflow==1.38.0 11 | - azureml-sdk==1.38.0 12 | - scikit-learn==0.24.1 13 | - pandas==1.2.1 14 | - joblib==1.0.0 15 | - matplotlib==3.3.3 16 | - fairlearn==0.7.0 17 | - azureml-contrib-fairness==1.38.0 18 | - interpret-community==0.24.1 19 | - interpret-core==0.2.7 20 | - azureml-interpret==1.38.0 21 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client 22 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector -------------------------------------------------------------------------------- /classical/python-sdk-v1/data-science/notebooks/experiment1.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/classical/python-sdk-v1/data-science/notebooks/experiment1.ipynb -------------------------------------------------------------------------------- /classical/python-sdk-v1/data-science/src/prep.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import sys 6 | import argparse 7 | import joblib 8 | import pandas as pd 9 | import numpy as np 10 | 11 | import mlflow 12 | import mlflow.sklearn 13 | 14 | from azureml.core import Run 15 | 16 | import argparse 17 | 18 | run = Run.get_context() 19 | ws = run.experiment.workspace 20 | 21 | def parse_args(): 22 | parser = argparse.ArgumentParser(description="UCI Credit example") 23 | parser.add_argument("--uci-credit", type=str, default='data/', help="Directory path to training data") 24 | parser.add_argument("--prepared_data_path", type=str, default='prepared_data/', help="prepared data directory") 25 | parser.add_argument("--enable_monitoring", type=str, default="false", help="enable logging to ADX") 26 | parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging") 27 | return parser.parse_known_args() 28 | 29 | def log_training_data(df, table_name): 30 | from obs.collector import Online_Collector 31 | from datetime import timedelta 32 | print("If there is an Authorization error, check your Azure KeyVault secret named kvmonitoringspkey. Terraform might put single quotation marks around the secret. Remove the single quotes and the secret should work.") 33 | collector = Online_Collector(table_name) 34 | df["timestamp"] = [pd.to_datetime('now') - timedelta(days=x) for x in range(len(df))] 35 | collector.batch_collect(df) 36 | 37 | 38 | def main(): 39 | # Parse command-line arguments 40 | args, unknown = parse_args() 41 | prepared_data_path = args.prepared_data_path 42 | 43 | # Make sure data output path exists 44 | if not os.path.exists(prepared_data_path): 45 | os.makedirs(prepared_data_path) 46 | 47 | # Enable auto logging 48 | mlflow.sklearn.autolog() 49 | 50 | # Read training data 51 | df = pd.read_csv(os.path.join(args.uci_credit, 'credit.csv')) 52 | 53 | random_data = np.random.rand(len(df)) 54 | 55 | msk_train = random_data < 0.7 56 | msk_val = (random_data >= 0.7) & (random_data < 0.85) 57 | msk_test = random_data >= 0.85 58 | 59 | train = df[msk_train] 60 | val = df[msk_val] 61 | test = df[msk_test] 62 | 63 | run.log('TRAIN SIZE', train.shape[0]) 64 | run.log('VAL SIZE', val.shape[0]) 65 | run.log('TEST SIZE', test.shape[0]) 66 | 67 | run.parent.log('TRAIN SIZE', train.shape[0]) 68 | run.parent.log('VAL SIZE', val.shape[0]) 69 | run.parent.log('TEST SIZE', test.shape[0]) 70 | 71 | TRAIN_PATH = os.path.join(prepared_data_path, "train.csv") 72 | VAL_PATH = os.path.join(prepared_data_path, "val.csv") 73 | TEST_PATH = os.path.join(prepared_data_path, "test.csv") 74 | 75 | train.to_csv(TRAIN_PATH, index=False) 76 | val.to_csv(VAL_PATH, index=False) 77 | test.to_csv(TEST_PATH, index=False) 78 | 79 | if (args.enable_monitoring.lower() == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower() == 'yes'): 80 | log_training_data(df, args.table_name) 81 | 82 | if __name__ == '__main__': 83 | main() 84 | -------------------------------------------------------------------------------- /classical/python-sdk-v1/data-science/src/score.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import glob 6 | import json 7 | import argparse 8 | import numpy as np 9 | import pandas as pd 10 | import joblib 11 | from datetime import timedelta 12 | from azureml.core.model import Model 13 | 14 | model = None 15 | explainer = None 16 | collector = None 17 | 18 | 19 | def init(): 20 | global model, explainer, collector 21 | print("Started batch scoring by running init()") 22 | 23 | parser = argparse.ArgumentParser("batch_scoring") 24 | parser.add_argument("--model_name", type=str, help="Model to use for batch scoring") 25 | parser.add_argument( 26 | "--enable_monitoring", type=str, help="Enable Monitoring", default="false" 27 | ) 28 | parser.add_argument("--table_name", type=str, help="Table Name for logging data") 29 | args, _ = parser.parse_known_args() 30 | 31 | model_path = Model.get_model_path(args.model_name) 32 | print(f"Model path: {model_path}") 33 | 34 | if "model.pkl" in model_path: 35 | model = joblib.load(model_path) 36 | else: 37 | model = joblib.load(os.path.join(model_path, "model.pkl")) 38 | 39 | # load the explainer 40 | explainer_path = os.path.join(Model.get_model_path(args.model_name), "explainer") 41 | # explainer = joblib.load(explainer_path) 42 | 43 | if ( 44 | args.enable_monitoring.lower() == "true" 45 | or args.enable_monitoring == "1" 46 | or args.enable_monitoring.lower() == "yes" 47 | ): 48 | from obs.collector import Online_Collector 49 | 50 | collector = Online_Collector(args.table_name) 51 | 52 | 53 | def run(file_list): 54 | 55 | print(f"Files to process: {file_list}") 56 | results = pd.DataFrame( 57 | columns=["Sno", "ProbaGoodCredit", "ProbaBadCredit", "FeatureImportance"] 58 | ) 59 | all_results = [] 60 | for filename in file_list: 61 | 62 | df = pd.read_csv(filename) 63 | sno = df["Sno"] 64 | df = df.drop("Sno", axis=1) 65 | 66 | proba = model.predict_proba(df) 67 | proba = pd.DataFrame(data=proba, columns=["ProbaGoodCredit", "ProbaBadCredit"]) 68 | 69 | # explanation = explainer.explain_local(df) 70 | # sorted feature importance values and feature names 71 | # sorted_local_importance_names = explanation.get_ranked_local_names() 72 | # sorted_local_importance_values = explanation.get_ranked_local_values() 73 | # get explanations in dictionnary 74 | # explanations = [] 75 | # for i, j in zip(sorted_local_importance_names[0], sorted_local_importance_values[0]): 76 | # explanations.append(dict(zip(i, j))) 77 | # explanation = pd.DataFrame(data=explanations, columns=["FeatureImportance"]) 78 | 79 | # result = pd.concat([sno, proba, explanation], axis=1) 80 | result = pd.concat([sno, proba], axis=1) 81 | results = results.append(result) 82 | all_results.append(pd.concat([df, proba], axis=1)) 83 | print(f"Batch scored: {filename}") 84 | 85 | if collector: 86 | full_results = pd.concat(all_results) 87 | full_results["timestamp"] = [ 88 | pd.to_datetime("now") - timedelta(days=x) for x in range(len(full_results)) 89 | ] 90 | collector.batch_collect(full_results) 91 | 92 | return results 93 | -------------------------------------------------------------------------------- /classical/python-sdk-v1/data-science/tests/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/classical/python-sdk-v1/data-science/tests/test.py -------------------------------------------------------------------------------- /classical/python-sdk-v1/mlops/devops-pipelines/deploy-drift-detection.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | variables: 5 | - template: ../../config-aml.yml 6 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 7 | # 'main' branch: PRD environment 8 | - template: ../../config-infra-prod.yml 9 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 10 | # 'develop' or feature branches: DEV environment 11 | - template: ../../config-infra-dev.yml 12 | - name: version 13 | value: python-sdk-v1 14 | 15 | trigger: 16 | - none 17 | 18 | pool: 19 | vmImage: $(ap_vm_image) 20 | 21 | stages: 22 | - stage: DeployDriftJob 23 | displayName: Deploy Drift Job 24 | jobs: 25 | - job: DeployDriftJob 26 | steps: 27 | - checkout: self 28 | path: s/ 29 | - checkout: mlops-templates 30 | path: s/templates/ 31 | - template: templates/${{ variables.version }}/deploy-drift-detection.yml@mlops-templates -------------------------------------------------------------------------------- /classical/python-sdk-v1/mlops/devops-pipelines/deploy-model-batch-scoring.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | variables: 5 | - template: ../../config-aml.yml 6 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 7 | # 'main' branch: PRD environment 8 | - template: ../../config-infra-prod.yml 9 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 10 | # 'develop' or feature branches: DEV environment 11 | - template: ../../config-infra-dev.yml 12 | - name: version 13 | value: python-sdk-v1 14 | 15 | trigger: none 16 | 17 | pool: 18 | vmImage: $(ap_vm_image) 19 | 20 | resources: 21 | repositories: 22 | - repository: mlops-templates # Template Repo 23 | name: mlops-templates 24 | type: git 25 | ref: main 26 | 27 | stages: 28 | - stage: DeployBatchScoringPipeline 29 | displayName: Deploy Batch Scoring Pipeline 30 | jobs: 31 | - job: DeployBatchScoringPipeline 32 | steps: 33 | - checkout: self 34 | path: s/ 35 | - task: Bash@3 36 | displayName: "Create checkout repository folder(s)" 37 | inputs: 38 | targetType: "inline" 39 | script: | 40 | set -e 41 | mkdir "$(Build.Repository.Name)" 42 | mkdir "mlops-templates" 43 | - checkout: mlops-templates 44 | path: s/templates/ 45 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates 46 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates 47 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates 48 | - template: templates/${{ variables.version }}/create-environment.yml@mlops-templates 49 | parameters: 50 | environment_name: $(batch_env_name) 51 | build_type: "conda" 52 | environment_file: $(batch_env_conda_yaml) 53 | enable_monitoring: $(enable_monitoring) 54 | - template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates 55 | parameters: 56 | data_type: scoring 57 | - template: templates/${{ variables.version }}/deploy-batch-scoring-pipeline.yml@mlops-templates 58 | parameters: 59 | enable_monitoring: $(enable_monitoring) 60 | - template: templates/${{ variables.version }}/add-pipeline-to-endpoint.yml@mlops-templates 61 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates 62 | -------------------------------------------------------------------------------- /classical/python-sdk-v1/mlops/devops-pipelines/deploy-model-training-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-model-training-pipeline 5 | 6 | variables: 7 | - template: ../../config-aml.yml 8 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 9 | # 'main' branch: PRD environment 10 | - template: ../../config-infra-prod.yml 11 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 12 | # 'develop' or feature branches: DEV environment 13 | - template: ../../config-infra-dev.yml 14 | - name: version 15 | value: python-sdk-v1 16 | 17 | trigger: none 18 | 19 | pool: 20 | vmImage: $(ap_vm_image) 21 | 22 | resources: 23 | repositories: 24 | - repository: mlops-templates # Template Repo 25 | name: mlops-templates 26 | type: git 27 | ref: main 28 | 29 | stages: 30 | - stage: DeployTrainingPipeline 31 | displayName: Deploy Training Pipeline 32 | jobs: 33 | - job: DeployTrainingPipeline 34 | steps: 35 | - checkout: self 36 | path: s/ 37 | - task: Bash@3 38 | displayName: "Create checkout repository folder(s)" 39 | inputs: 40 | targetType: "inline" 41 | script: | 42 | set -e 43 | mkdir "$(Build.Repository.Name)" 44 | mkdir "mlops-templates" 45 | - checkout: mlops-templates 46 | path: s/templates/ 47 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates 48 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates 49 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates 50 | - template: templates/${{ variables.version }}/create-environment.yml@mlops-templates 51 | parameters: 52 | environment_name: $(training_env_name) 53 | build_type: "conda" 54 | environment_file: $(training_env_conda_yaml) 55 | enable_monitoring: $(enable_monitoring) 56 | - template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates 57 | parameters: 58 | data_type: training 59 | - template: templates/${{ variables.version }}/get-compute.yml@mlops-templates 60 | parameters: 61 | compute_type: training 62 | - template: templates/${{ variables.version }}/deploy-training-pipeline.yml@mlops-templates 63 | parameters: 64 | enable_monitoring: $(enable_monitoring) 65 | - template: templates/${{ variables.version }}/add-pipeline-to-endpoint.yml@mlops-templates 66 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates 67 | -------------------------------------------------------------------------------- /classical/python-sdk-v2/data-science/environment/train-conda.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - defaults 3 | - anaconda 4 | - conda-forge 5 | dependencies: 6 | - python=3.7.5 7 | - pip 8 | - pip: 9 | - azureml-mlflow==1.38.0 10 | - azure-ai-ml==1.0.0 11 | - pyarrow==10.0.0 12 | - scikit-learn==0.24.1 13 | - pandas==1.2.1 14 | - joblib==1.0.0 15 | - matplotlib==3.3.3 16 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client 17 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector -------------------------------------------------------------------------------- /classical/python-sdk-v2/data-science/src/prep/prep.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | """ 4 | Prepares raw data and provides training, validation and test datasets 5 | """ 6 | 7 | import argparse 8 | 9 | from pathlib import Path 10 | import os 11 | import numpy as np 12 | import pandas as pd 13 | 14 | import mlflow 15 | 16 | TARGET_COL = "cost" 17 | 18 | NUMERIC_COLS = [ 19 | "distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude", 20 | "pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour", 21 | "pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday", 22 | "dropoff_hour", "dropoff_minute", "dropoff_second" 23 | ] 24 | 25 | CAT_NOM_COLS = [ 26 | "store_forward", "vendor" 27 | ] 28 | 29 | CAT_ORD_COLS = [ 30 | ] 31 | 32 | def parse_args(): 33 | '''Parse input arguments''' 34 | 35 | parser = argparse.ArgumentParser("prep") 36 | parser.add_argument("--raw_data", type=str, help="Path to raw data") 37 | parser.add_argument("--train_data", type=str, help="Path to train dataset") 38 | parser.add_argument("--val_data", type=str, help="Path to test dataset") 39 | parser.add_argument("--test_data", type=str, help="Path to test dataset") 40 | 41 | parser.add_argument("--enable_monitoring", type=str, help="enable logging to ADX") 42 | parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging") 43 | 44 | args = parser.parse_args() 45 | 46 | return args 47 | 48 | def log_training_data(df, table_name): 49 | from obs.collector import Online_Collector 50 | collector = Online_Collector(table_name) 51 | collector.batch_collect(df) 52 | 53 | def main(args): 54 | '''Read, split, and save datasets''' 55 | 56 | # ------------ Reading Data ------------ # 57 | # -------------------------------------- # 58 | 59 | data = pd.read_csv((Path(args.raw_data))) 60 | data = data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS + [TARGET_COL]] 61 | 62 | # ------------- Split Data ------------- # 63 | # -------------------------------------- # 64 | 65 | # Split data into train, val and test datasets 66 | 67 | random_data = np.random.rand(len(data)) 68 | 69 | msk_train = random_data < 0.7 70 | msk_val = (random_data >= 0.7) & (random_data < 0.85) 71 | msk_test = random_data >= 0.85 72 | 73 | train = data[msk_train] 74 | val = data[msk_val] 75 | test = data[msk_test] 76 | 77 | mlflow.log_metric('train size', train.shape[0]) 78 | mlflow.log_metric('val size', val.shape[0]) 79 | mlflow.log_metric('test size', test.shape[0]) 80 | 81 | train.to_parquet((Path(args.train_data) / "train.parquet")) 82 | val.to_parquet((Path(args.val_data) / "val.parquet")) 83 | test.to_parquet((Path(args.test_data) / "test.parquet")) 84 | 85 | if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'): 86 | log_training_data(data, args.table_name) 87 | 88 | 89 | if __name__ == "__main__": 90 | 91 | mlflow.start_run() 92 | 93 | # ---------- Parse Arguments ----------- # 94 | # -------------------------------------- # 95 | 96 | args = parse_args() 97 | 98 | lines = [ 99 | f"Raw data path: {args.raw_data}", 100 | f"Train dataset output path: {args.train_data}", 101 | f"Val dataset output path: {args.val_data}", 102 | f"Test dataset path: {args.test_data}", 103 | 104 | ] 105 | 106 | for line in lines: 107 | print(line) 108 | 109 | main(args) 110 | 111 | mlflow.end_run() 112 | 113 | 114 | -------------------------------------------------------------------------------- /classical/python-sdk-v2/data-science/src/register/register.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | """ 4 | Registers trained ML model if deploy flag is True. 5 | """ 6 | 7 | import argparse 8 | from pathlib import Path 9 | import pickle 10 | import mlflow 11 | 12 | import os 13 | import json 14 | 15 | def parse_args(): 16 | '''Parse input arguments''' 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--model_name', type=str, help='Name under which model will be registered') 20 | parser.add_argument('--model_path', type=str, help='Model directory') 21 | parser.add_argument('--evaluation_output', type=str, help='Path of eval results') 22 | parser.add_argument( 23 | "--model_info_output_path", type=str, help="Path to write model info JSON" 24 | ) 25 | args, _ = parser.parse_known_args() 26 | print(f'Arguments: {args}') 27 | 28 | return args 29 | 30 | 31 | def main(args): 32 | '''Loads model, registers it if deply flag is True''' 33 | 34 | with open((Path(args.evaluation_output) / "deploy_flag"), 'rb') as infile: 35 | deploy_flag = int(infile.read()) 36 | 37 | mlflow.log_metric("deploy flag", int(deploy_flag)) 38 | deploy_flag=1 39 | if deploy_flag==1: 40 | 41 | print("Registering ", args.model_name) 42 | 43 | # load model 44 | model = mlflow.sklearn.load_model(args.model_path) 45 | 46 | # log model using mlflow 47 | mlflow.sklearn.log_model(model, args.model_name) 48 | 49 | # register logged model using mlflow 50 | run_id = mlflow.active_run().info.run_id 51 | model_uri = f'runs:/{run_id}/{args.model_name}' 52 | mlflow_model = mlflow.register_model(model_uri, args.model_name) 53 | model_version = mlflow_model.version 54 | 55 | # write model info 56 | print("Writing JSON") 57 | dict = {"id": "{0}:{1}".format(args.model_name, model_version)} 58 | output_path = os.path.join(args.model_info_output_path, "model_info.json") 59 | with open(output_path, "w") as of: 60 | json.dump(dict, fp=of) 61 | 62 | else: 63 | print("Model will not be registered!") 64 | 65 | if __name__ == "__main__": 66 | 67 | mlflow.start_run() 68 | 69 | # ---------- Parse Arguments ----------- # 70 | # -------------------------------------- # 71 | 72 | args = parse_args() 73 | 74 | lines = [ 75 | f"Model name: {args.model_name}", 76 | f"Model path: {args.model_path}", 77 | f"Evaluation output path: {args.evaluation_output}", 78 | ] 79 | 80 | for line in lines: 81 | print(line) 82 | 83 | main(args) 84 | 85 | mlflow.end_run() 86 | -------------------------------------------------------------------------------- /classical/python-sdk-v2/data/taxi-request.json: -------------------------------------------------------------------------------- 1 | {"input_data": [[2.86,40.66551971,-73.98258972,1,40.69801331,-73.97357178,0,2,1,1,19,21,3,56,1,1,19,21,21,57], 2 | [3.98,40.68072128,-73.931633,1,40.6909523,-73.99185181,0,2,0,1,4,21,44,11,0,1,4,21,59,35]] 3 | } -------------------------------------------------------------------------------- /classical/python-sdk-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-batch-endpoint-pipeline 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | - name: version 14 | value: python-sdk-v2 15 | - name: endpoint_name 16 | value: taxi-batch-$(namespace)$(postfix)$(environment) 17 | 18 | trigger: none 19 | 20 | pool: 21 | vmImage: ubuntu-20.04 22 | 23 | resources: 24 | repositories: 25 | - repository: mlops-templates # Template Repo 26 | name: mlops-templates 27 | type: git 28 | ref: main 29 | 30 | stages: 31 | - stage: CreateBatchEndpoint 32 | displayName: Create/Update Batch Endpoint 33 | jobs: 34 | - job: DeployBatchEndpoint 35 | steps: 36 | - checkout: self 37 | path: s/ 38 | - task: Bash@3 39 | displayName: "Create checkout repository folder(s)" 40 | inputs: 41 | targetType: "inline" 42 | script: | 43 | set -e 44 | mkdir "$(Build.Repository.Name)" 45 | mkdir "mlops-templates" 46 | - checkout: mlops-templates 47 | path: s/templates/ 48 | - template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates 49 | - template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates 50 | - template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates 51 | - template: templates/aml-cli-v2/create-compute.yml@mlops-templates 52 | parameters: 53 | cluster_name: batch-cluster # name must match cluster name in deployment file below 54 | size: STANDARD_DS3_V2 55 | min_instances: 0 56 | max_instances: 5 57 | cluster_tier: dedicated 58 | - template: templates/${{ variables.version }}/create-batch-endpoint.yml@mlops-templates 59 | parameters: 60 | endpoint_name: "${{ variables.endpoint_name }}" 61 | endpoint_description: "Taxi batch endpoint" 62 | auth_mode: "aad_token" 63 | - template: templates/${{ variables.version }}/create-batch-deployment.yml@mlops-templates 64 | parameters: 65 | deployment_name: taxi-batch-dp 66 | deployment_description: "Taxi batch deployment" 67 | endpoint_name: "${{ variables.endpoint_name }}" 68 | model_path: "taxi-model@latest" 69 | compute: batch-cluster 70 | - template: templates/${{ variables.version }}/test-batch-endpoint.yml@mlops-templates 71 | parameters: 72 | endpoint_name: "${{ variables.endpoint_name }}" 73 | sample_request: data/taxi-batch.csv 74 | request_type: uri_file #either uri_folder or uri_file 75 | -------------------------------------------------------------------------------- /classical/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-model-training-pipeline 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | - name: version 14 | value: python-sdk-v2 15 | 16 | trigger: none 17 | 18 | pool: 19 | vmImage: ubuntu-20.04 20 | 21 | resources: 22 | repositories: 23 | - repository: mlops-templates # Template Repo 24 | name: mlops-templates 25 | type: git 26 | ref: main 27 | 28 | stages: 29 | - stage: DeployTrainingPipeline 30 | displayName: Deploy Training Pipeline 31 | jobs: 32 | - job: DeployTrainingPipeline 33 | timeoutInMinutes: 120 # how long to run the job before automatically cancelling 34 | steps: 35 | - checkout: self 36 | path: s/ 37 | - task: Bash@3 38 | displayName: "Create checkout repository folder(s)" 39 | inputs: 40 | targetType: "inline" 41 | script: | 42 | set -e 43 | mkdir "$(Build.Repository.Name)" 44 | mkdir "mlops-templates" 45 | - checkout: mlops-templates 46 | path: s/templates/ 47 | - template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates 48 | - template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates 49 | - template: templates/python-sdk-v2/install-requirements.yml@mlops-templates 50 | - template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates 51 | - template: templates/aml-cli-v2/create-compute.yml@mlops-templates 52 | parameters: 53 | cluster_name: cpu-cluster 54 | size: Standard_DS3_v2 55 | min_instances: 0 56 | max_instances: 4 57 | cluster_tier: low_priority 58 | - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates 59 | parameters: 60 | environment_name: taxi-train-env 61 | environment_description: "Training Environment for Taxi Pipeline" 62 | environment_path: data-science/environment/train-conda.yml 63 | build_type: conda 64 | - template: templates/${{ variables.version }}/register-data-asset.yml@mlops-templates 65 | parameters: 66 | data_name: taxi-data 67 | data_description: taxi-training-dataset 68 | data_path: data/taxi-data.csv 69 | data_type: uri_file 70 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates 71 | parameters: 72 | pipeline_path: mlops/azureml/train/run_pipeline.py 73 | experiment_name: taxi-train-pipeline 74 | data_name: taxi-data 75 | environment_name: taxi-train-env 76 | compute_name: cpu-cluster 77 | enable_monitoring: $(enable_monitoring) 78 | table_name: "taximonitoring" 79 | -------------------------------------------------------------------------------- /classical/python-sdk-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-online-endpoint-pipeline 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | - name: version 14 | value: python-sdk-v2 15 | - name: endpoint_name 16 | value: taxi-online-$(namespace)$(postfix)$(environment) 17 | 18 | trigger: none 19 | 20 | pool: 21 | vmImage: ubuntu-20.04 22 | 23 | resources: 24 | repositories: 25 | - repository: mlops-templates # Template Repo 26 | name: mlops-templates 27 | type: git 28 | ref: main 29 | 30 | stages: 31 | - stage: CreateOnlineEndpoint 32 | displayName: Create/Update Online Endpoint 33 | jobs: 34 | - job: DeployOnlineEndpoint 35 | steps: 36 | - checkout: self 37 | path: s/ 38 | - task: Bash@3 39 | displayName: "Create checkout repository folder(s)" 40 | inputs: 41 | targetType: "inline" 42 | script: | 43 | set -e 44 | mkdir "$(Build.Repository.Name)" 45 | mkdir "mlops-templates" 46 | - checkout: mlops-templates 47 | path: s/templates/ 48 | - template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates 49 | - template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates 50 | - template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates 51 | - template: templates/${{ variables.version }}/create-online-endpoint.yml@mlops-templates 52 | parameters: 53 | endpoint_name: "${{ variables.endpoint_name }}" 54 | endpoint_description: "Taxi Online Endpoint" 55 | auth_mode: "aml_token" 56 | - template: templates/${{ variables.version }}/create-online-deployment.yml@mlops-templates 57 | parameters: 58 | deployment_name: taxi-online-dp 59 | endpoint_name: "${{ variables.endpoint_name }}" 60 | model_path: "taxi-model@latest" 61 | traffic_allocation: 100 62 | - template: templates/${{ variables.version }}/test-online-endpoint.yml@mlops-templates 63 | parameters: 64 | endpoint_name: "${{ variables.endpoint_name }}" 65 | sample_request: data/taxi-request.json 66 | -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/data-science/environment/train-conda.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - defaults 3 | - anaconda 4 | - conda-forge 5 | dependencies: 6 | - python=3.7.5 7 | - pip 8 | - pip: 9 | - azureml-mlflow==1.38.0 10 | - azureml-sdk==1.38.0 11 | - scikit-learn==0.24.1 12 | - pandas==1.2.1 13 | - joblib==1.0.0 14 | - matplotlib==3.3.3 15 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client 16 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/data-science/environment/train-requirements.txt: -------------------------------------------------------------------------------- 1 | azureml-mlflow==1.38.0 2 | pyarrow==10.0.0 3 | scikit-learn==0.24.1 4 | pandas==1.2.1 5 | joblib==1.2.0 6 | matplotlib==3.3.3 -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/data-science/experiment/requirements.txt: -------------------------------------------------------------------------------- 1 | azureml-mlflow==1.38.0 2 | scikit-learn==0.24.1 3 | pandas==1.2.1 4 | joblib==1.2.0 5 | matplotlib==3.3.3 -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/data-science/src/prep/prep.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | """ 4 | Prepares raw data and provides training, validation and test datasets 5 | """ 6 | 7 | import argparse 8 | 9 | from pathlib import Path 10 | import os 11 | import numpy as np 12 | import pandas as pd 13 | 14 | import mlflow 15 | 16 | TARGET_COL = "cost" 17 | 18 | NUMERIC_COLS = [ 19 | "distance", 20 | "dropoff_latitude", 21 | "dropoff_longitude", 22 | "passengers", 23 | "pickup_latitude", 24 | "pickup_longitude", 25 | "pickup_weekday", 26 | "pickup_month", 27 | "pickup_monthday", 28 | "pickup_hour", 29 | "pickup_minute", 30 | "pickup_second", 31 | "dropoff_weekday", 32 | "dropoff_month", 33 | "dropoff_monthday", 34 | "dropoff_hour", 35 | "dropoff_minute", 36 | "dropoff_second", 37 | ] 38 | 39 | CAT_NOM_COLS = [ 40 | "store_forward", 41 | "vendor", 42 | ] 43 | 44 | CAT_ORD_COLS = [ 45 | ] 46 | 47 | def parse_args(): 48 | '''Parse input arguments''' 49 | 50 | parser = argparse.ArgumentParser("prep") 51 | parser.add_argument("--raw_data", type=str, help="Path to raw data") 52 | parser.add_argument("--train_data", type=str, help="Path to train dataset") 53 | parser.add_argument("--val_data", type=str, help="Path to test dataset") 54 | parser.add_argument("--test_data", type=str, help="Path to test dataset") 55 | 56 | parser.add_argument("--enable_monitoring", type=str, help="enable logging to ADX") 57 | parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging") 58 | 59 | args = parser.parse_args() 60 | 61 | return args 62 | 63 | def log_training_data(df, table_name): 64 | from obs.collector import Online_Collector 65 | collector = Online_Collector(table_name) 66 | collector.batch_collect(df) 67 | 68 | def main(args): 69 | '''Read, split, and save datasets''' 70 | 71 | # ------------ Reading Data ------------ # 72 | # -------------------------------------- # 73 | 74 | data = pd.read_csv((Path(args.raw_data))) 75 | data = data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS + [TARGET_COL]] 76 | 77 | # ------------- Split Data ------------- # 78 | # -------------------------------------- # 79 | 80 | # Split data into train, val and test datasets 81 | 82 | random_data = np.random.rand(len(data)) 83 | 84 | msk_train = random_data < 0.7 85 | msk_val = (random_data >= 0.7) & (random_data < 0.85) 86 | msk_test = random_data >= 0.85 87 | 88 | train = data[msk_train] 89 | val = data[msk_val] 90 | test = data[msk_test] 91 | 92 | mlflow.log_metric('train size', train.shape[0]) 93 | mlflow.log_metric('val size', val.shape[0]) 94 | mlflow.log_metric('test size', test.shape[0]) 95 | 96 | train.to_parquet((Path(args.train_data) / "train.parquet")) 97 | val.to_parquet((Path(args.val_data) / "val.parquet")) 98 | test.to_parquet((Path(args.test_data) / "test.parquet")) 99 | 100 | if (args.enable_monitoring.lower() == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower() == 'yes'): 101 | log_training_data(data, args.table_name) 102 | 103 | 104 | if __name__ == "__main__": 105 | 106 | mlflow.start_run() 107 | 108 | # ---------- Parse Arguments ----------- # 109 | # -------------------------------------- # 110 | 111 | args = parse_args() 112 | 113 | lines = [ 114 | f"Raw data path: {args.raw_data}", 115 | f"Train dataset output path: {args.train_data}", 116 | f"Val dataset output path: {args.val_data}", 117 | f"Test dataset path: {args.test_data}", 118 | 119 | ] 120 | 121 | for line in lines: 122 | print(line) 123 | 124 | main(args) 125 | 126 | mlflow.end_run() 127 | 128 | 129 | -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/data-science/src/register/register.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | """ 4 | Registers trained ML model if deploy flag is True. 5 | """ 6 | 7 | import argparse 8 | from pathlib import Path 9 | import pickle 10 | import mlflow 11 | 12 | import os 13 | import json 14 | 15 | def parse_args(): 16 | '''Parse input arguments''' 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--model_name', type=str, help='Name under which model will be registered') 20 | parser.add_argument('--model_path', type=str, help='Model directory') 21 | parser.add_argument('--evaluation_output', type=str, help='Path of eval results') 22 | parser.add_argument( 23 | "--model_info_output_path", type=str, help="Path to write model info JSON" 24 | ) 25 | args, _ = parser.parse_known_args() 26 | print(f'Arguments: {args}') 27 | 28 | return args 29 | 30 | 31 | def main(args): 32 | '''Loads model, registers it if deply flag is True''' 33 | 34 | with open((Path(args.evaluation_output) / "deploy_flag"), 'rb') as infile: 35 | deploy_flag = int(infile.read()) 36 | 37 | mlflow.log_metric("deploy flag", int(deploy_flag)) 38 | deploy_flag=1 39 | if deploy_flag==1: 40 | 41 | print("Registering ", args.model_name) 42 | 43 | # load model 44 | model = mlflow.sklearn.load_model(args.model_path) 45 | 46 | # log model using mlflow 47 | mlflow.sklearn.log_model(model, args.model_name) 48 | 49 | # register logged model using mlflow 50 | run_id = mlflow.active_run().info.run_id 51 | model_uri = f'runs:/{run_id}/{args.model_name}' 52 | mlflow_model = mlflow.register_model(model_uri, args.model_name) 53 | model_version = mlflow_model.version 54 | 55 | # write model info 56 | print("Writing JSON") 57 | dict = {"id": "{0}:{1}".format(args.model_name, model_version)} 58 | output_path = os.path.join(args.model_info_output_path, "model_info.json") 59 | with open(output_path, "w") as of: 60 | json.dump(dict, fp=of) 61 | 62 | else: 63 | print("Model will not be registered!") 64 | 65 | if __name__ == "__main__": 66 | 67 | mlflow.start_run() 68 | 69 | # ---------- Parse Arguments ----------- # 70 | # -------------------------------------- # 71 | 72 | args = parse_args() 73 | 74 | lines = [ 75 | f"Model name: {args.model_name}", 76 | f"Model path: {args.model_path}", 77 | f"Evaluation output path: {args.evaluation_output}", 78 | ] 79 | 80 | for line in lines: 81 | print(line) 82 | 83 | main(args) 84 | 85 | mlflow.end_run() 86 | -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/data/taxi-request.json: -------------------------------------------------------------------------------- 1 | {"input_data": [[2.86,40.66551971,-73.98258972,1,40.69801331,-73.97357178,0,2,1,1,19,21,3,56,1,1,19,21,21,57], 2 | [3.98,40.68072128,-73.931633,1,40.6909523,-73.99185181,0,2,0,1,4,21,44,11,0,1,4,21,59,35]] 3 | } -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/mlops/azureml/deploy/batch/batch-deployment.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json 2 | name: batch-dp 3 | endpoint_name: taxi-fare-batch 4 | model: azureml:taxi-model@latest 5 | compute: azureml:batch-cluster 6 | resources: 7 | instance_count: 1 8 | max_concurrency_per_instance: 2 9 | mini_batch_size: 10 10 | output_action: append_row 11 | output_file_name: predictions.csv 12 | retry_settings: 13 | max_retries: 3 14 | timeout: 30 15 | error_threshold: -1 16 | logging_level: info -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/mlops/azureml/deploy/batch/batch-endpoint.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/batchEndpoint.schema.json 2 | name: taxi-fare-batch 3 | description: taxi cost batch endpoint 4 | auth_mode: aad_token -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/mlops/azureml/deploy/online/online-deployment.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json 2 | name: blue 3 | endpoint_name: taxi-fare-online 4 | model: azureml:taxi-model@latest 5 | instance_type: Standard_DS2_v2 6 | instance_count: 1 7 | -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/mlops/azureml/deploy/online/online-endpoint.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json 2 | name: taxi-fare-online 3 | description: taxi cost online endpoint 4 | auth_mode: key -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/mlops/azureml/deploy/online/score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/classical/rai-aml-cli-v2/mlops/azureml/deploy/online/score.py -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/mlops/azureml/train/train-env.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json 2 | name: taxi-train-env 3 | image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04 4 | conda_file: ../../../data-science/environment/train-conda.yml 5 | description: Environment created from a Docker image plus Conda environment to train taxi model. -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-batch-endpoint-pipeline 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | - name: version 14 | value: aml-cli-v2 15 | - name: endpoint_name 16 | value: taxi-batch-$(namespace)$(postfix)$(environment) 17 | - name: endpoint_type 18 | value: batch 19 | 20 | trigger: none 21 | 22 | pool: 23 | vmImage: ubuntu-20.04 24 | 25 | resources: 26 | repositories: 27 | - repository: mlops-templates # Template Repo 28 | name: mlops-templates 29 | type: git 30 | ref: main 31 | 32 | stages: 33 | - stage: CreateBatchEndpoint 34 | displayName: Create/Update Batch Endpoint 35 | jobs: 36 | - job: DeployBatchEndpoint 37 | steps: 38 | - checkout: self 39 | path: s/ 40 | - task: Bash@3 41 | displayName: "Create checkout repository folder(s)" 42 | inputs: 43 | targetType: "inline" 44 | script: | 45 | set -e 46 | mkdir "$(Build.Repository.Name)" 47 | mkdir "mlops-templates" 48 | - checkout: mlops-templates 49 | path: s/templates/ 50 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates 51 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates 52 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates 53 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates 54 | parameters: 55 | cluster_name: batch-cluster # name must match cluster name in deployment file below 56 | size: STANDARD_DS3_V2 57 | min_instances: 0 58 | max_instances: 5 59 | cluster_tier: dedicated 60 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates 61 | parameters: 62 | endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml 63 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates 64 | parameters: 65 | deployment_name: taxi-batch-dp 66 | deployment_file: mlops/azureml/deploy/batch/batch-deployment.yml 67 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates 68 | parameters: 69 | deployment_name: taxi-batch-dp 70 | sample_request: data/taxi-batch.csv 71 | request_type: uri_file #either uri_folder or uri_file 72 | 73 | -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-online-endpoint-pipeline 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | - name: version 14 | value: aml-cli-v2 15 | - name: endpoint_name 16 | value: taxi-online-$(namespace)$(postfix)$(environment) 17 | - name: endpoint_type 18 | value: online 19 | 20 | trigger: none 21 | 22 | pool: 23 | vmImage: ubuntu-20.04 24 | 25 | resources: 26 | repositories: 27 | - repository: mlops-templates # Template Repo 28 | name: mlops-templates 29 | type: git 30 | ref: main 31 | 32 | stages: 33 | - stage: CreateOnlineEndpoint 34 | displayName: Create/Update Online Endpoint 35 | jobs: 36 | - job: DeployOnlineEndpoint 37 | steps: 38 | - checkout: self 39 | path: s/ 40 | - task: Bash@3 41 | displayName: "Create checkout repository folder(s)" 42 | inputs: 43 | targetType: "inline" 44 | script: | 45 | set -e 46 | mkdir "$(Build.Repository.Name)" 47 | mkdir "mlops-templates" 48 | - checkout: mlops-templates 49 | path: s/templates/ 50 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates 51 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates 52 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates 53 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates 54 | parameters: 55 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml 56 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates 57 | parameters: 58 | deployment_name: taxi-online-dp 59 | deployment_file: mlops/azureml/deploy/online/online-deployment.yml 60 | - template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates 61 | parameters: 62 | traffic_allocation: taxi-online-dp=100 63 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates 64 | parameters: 65 | deployment_name: taxi-online-dp 66 | sample_request: data/taxi-request.json 67 | request_type: json 68 | -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/mlops/devops-pipelines/register-rai-components.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - task: AzureCLI@2 3 | displayName: Register RAI components 4 | continueOnError: true 5 | inputs: 6 | azureSubscription: $(ado_service_connection_rg) #needs to have access at the RG level 7 | scriptType: bash 8 | workingDirectory: $(System.DefaultWorkingDirectory) 9 | scriptLocation: inlineScript 10 | inlineScript: | 11 | subscription_id=$(az account list --query "[?isDefault].id | [0]" --output tsv) 12 | chmod u+x quick-setup.bash 13 | bash quick-setup.bash conda-env $subscription_id $(resource_group) $(aml_workspace) 14 | -------------------------------------------------------------------------------- /classical/rai-aml-cli-v2/mlops/devops-pipelines/trigger_.code-search: -------------------------------------------------------------------------------- 1 | # Query: trigger: 2 | # ContextLines: 1 3 | 4 | 20 results - 20 files 5 | 6 | classical/aml-cli-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml: 7 | 17 8 | 18: trigger: none 9 | 10 | classical/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml: 11 | 13 12 | 14: trigger: none 13 | 14 | classical/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml: 15 | 17 16 | 18: trigger: none 17 | 18 | classical/python-sdk-v1/mlops/devops-pipelines/deploy-drift-detection.yml: 19 | 14 20 | 15: trigger: none 21 | 22 | classical/python-sdk-v1/mlops/devops-pipelines/deploy-model-batch-scoring.yml: 23 | 14 24 | 15: trigger: none 25 | 26 | classical/python-sdk-v1/mlops/devops-pipelines/deploy-model-training-pipeline.yml: 27 | 14 28 | 15: trigger: none 29 | 30 | classical/python-sdk-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml: 31 | 15 32 | 16: trigger: none 33 | 34 | classical/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml: 35 | 13 36 | 14: trigger: none 37 | 38 | classical/python-sdk-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml: 39 | 15 40 | 16: trigger: none 41 | 42 | classical/rai-aml-cli-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml: 43 | 17 44 | 18: trigger: none 45 | 46 | classical/rai-aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml: 47 | 13 48 | 14: trigger: none 49 | 50 | classical/rai-aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml: 51 | 17 52 | 18: trigger: none 53 | 54 | cv/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml: 55 | 13 56 | 14: trigger: none 57 | 58 | cv/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml: 59 | 17 60 | 18: trigger: none 61 | 62 | cv/python-sdk-v1/mlops/devops-pipelines/deploy-model-training-pipeline.yml: 63 | 14 64 | 15: trigger: none 65 | 66 | infrastructure/bicep/pipelines/bicep-ado-deploy-infra.yml: 67 | 11 68 | 12: trigger: none 69 | 70 | infrastructure/terraform/devops-pipelines/tf-ado-deploy-infra.yml: 71 | 11 72 | 12: trigger: none 73 | 74 | nlp/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml: 75 | 13 76 | 14: trigger: none 77 | 78 | nlp/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml: 79 | 17 80 | 18: trigger: none 81 | 82 | nlp/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml: 83 | 13 84 | 14: trigger: none 85 | -------------------------------------------------------------------------------- /config-infra-dev.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | # Dev environment 5 | variables: 6 | # Global 7 | ap_vm_image: ubuntu-20.04 8 | 9 | namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters. 10 | postfix: 0001 11 | location: eastus 12 | environment: dev 13 | enable_aml_computecluster: true 14 | enable_monitoring: false 15 | 16 | # Azure DevOps 17 | ado_service_connection_rg: Azure-ARM-Dev 18 | ado_service_connection_aml_ws: Azure-ARM-Dev 19 | 20 | # DO NOT TOUCH 21 | 22 | # For pipeline reference 23 | resource_group: rg-$(namespace)-$(postfix)$(environment) 24 | aml_workspace: mlw-$(namespace)-$(postfix)$(environment) 25 | application_insights: mlw-$(namespace)-$(postfix)$(environment) 26 | key_vault: kv-$(namespace)-$(postfix)$(environment) 27 | container_registry: cr$(namespace)$(postfix)$(environment) 28 | storage_account: st$(namespace)$(postfix)$(environment) 29 | 30 | # For terraform reference 31 | terraform_version: 1.3.6 32 | terraform_workingdir: infrastructure 33 | terraform_st_location: $(location) 34 | terraform_st_resource_group: rg-$(namespace)-$(postfix)$(environment)-tf 35 | terraform_st_storage_account: st$(namespace)$(postfix)$(environment)tf 36 | terraform_st_container_name: default 37 | terraform_st_key: mlops-tab 38 | -------------------------------------------------------------------------------- /config-infra-prod.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | # Prod environment 5 | variables: 6 | # Global 7 | ap_vm_image: ubuntu-20.04 8 | 9 | namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters. 10 | postfix: 0001 11 | location: eastus 12 | environment: prod 13 | enable_aml_computecluster: true 14 | enable_monitoring: false 15 | 16 | # Azure DevOps 17 | ado_service_connection_rg: Azure-ARM-Prod 18 | ado_service_connection_aml_ws: Azure-ARM-Prod 19 | 20 | # DO NOT TOUCH 21 | 22 | # For pipeline reference 23 | resource_group: rg-$(namespace)-$(postfix)$(environment) 24 | aml_workspace: mlw-$(namespace)-$(postfix)$(environment) 25 | application_insights: mlw-$(namespace)-$(postfix)$(environment) 26 | key_vault: kv-$(namespace)-$(postfix)$(environment) 27 | container_registry: cr$(namespace)$(postfix)$(environment) 28 | storage_account: st$(namespace)$(postfix)$(environment) 29 | 30 | # For terraform reference 31 | terraform_version: 1.3.6 32 | terraform_workingdir: infrastructure 33 | terraform_st_location: $(location) 34 | terraform_st_resource_group: rg-$(namespace)-$(postfix)$(environment)-tf 35 | terraform_st_storage_account: st$(namespace)$(postfix)$(environment)tf 36 | terraform_st_container_name: default 37 | terraform_st_key: mlops-tab 38 | -------------------------------------------------------------------------------- /cv/README.md: -------------------------------------------------------------------------------- 1 | # Computer Vision 2 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/data-science/environment/Dockerfile: -------------------------------------------------------------------------------- 1 | # check release notes https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html 2 | FROM nvcr.io/nvidia/pytorch:22.04-py3 3 | 4 | ############################################################################## 5 | # NCCL TESTS 6 | ############################################################################## 7 | ENV NCCL_TESTS_TAG=v2.11.0 8 | 9 | # NOTE: adding gencodes to support K80, M60, V100, A100 10 | RUN mkdir /tmp/nccltests && \ 11 | cd /tmp/nccltests && \ 12 | git clone -b ${NCCL_TESTS_TAG} https://github.com/NVIDIA/nccl-tests.git && \ 13 | cd nccl-tests && \ 14 | make \ 15 | MPI=1 MPI_HOME=/opt/hpcx/ompi \ 16 | NVCC_GENCODE="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_80,code=sm_80" \ 17 | CUDA_HOME=/usr/local/cuda && \ 18 | cp ./build/* /usr/local/bin && \ 19 | rm -rf /tmp/nccltests 20 | 21 | # Install dependencies missing in this container 22 | # NOTE: container already has matplotlib==3.5.1 tqdm==4.62.0 23 | COPY requirements.txt ./ 24 | RUN pip install -r requirements.txt 25 | 26 | # RUN python -m pip install azureml-defaults==1.41.0 \ 27 | # mlflow==1.25.1 \ 28 | # azureml-mlflow==1.41.0 \ 29 | # transformers==4.18.0 \ 30 | # psutil==5.9.0 31 | 32 | # add ndv4-topo.xml 33 | RUN mkdir /opt/microsoft/ 34 | ADD ./ndv4-topo.xml /opt/microsoft 35 | 36 | # to use on A100, enable env var below in your job 37 | # ENV NCCL_TOPO_FILE="/opt/microsoft/ndv4-topo.xml" 38 | 39 | # adjusts the level of info from NCCL tests 40 | ENV NCCL_DEBUG="INFO" 41 | ENV NCCL_DEBUG_SUBSYS="GRAPH,INIT,ENV" 42 | 43 | # Relaxed Ordering can greatly help the performance of Infiniband networks in virtualized environments. 44 | ENV NCCL_IB_PCI_RELAXED_ORDERING="1" 45 | ENV CUDA_DEVICE_ORDER="PCI_BUS_ID" 46 | ENV NCCL_SOCKET_IFNAME="eth0" 47 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/data-science/environment/ndv4-topo.xml: -------------------------------------------------------------------------------- 1 | <!-- This topology file was copied from https://github.com/Azure/azhpc-images/blob/master/common/network-tuning.sh --> 2 | <system version="1"> 3 | <cpu numaid="0" affinity="0000ffff,0000ffff" arch="x86_64" vendor="AuthenticAMD" familyid="23" modelid="49"> 4 | <pci busid="ffff:ff:01.0" class="0x060400" link_speed="16 GT/s" link_width="16"> 5 | <pci busid="0001:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/> 6 | <pci busid="0101:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/> 7 | <pci busid="0002:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/> 8 | <pci busid="0102:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/> 9 | </pci> 10 | </cpu> 11 | <cpu numaid="1" affinity="0000ffff,0000ffff" arch="x86_64" vendor="AuthenticAMD" familyid="23" modelid="49"> 12 | <pci busid="ffff:ff:02.0" class="0x060400" link_speed="16 GT/s" link_width="16"> 13 | <pci busid="0003:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/> 14 | <pci busid="0103:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/> 15 | <pci busid="0004:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/> 16 | <pci busid="0104:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/> 17 | </pci> 18 | </cpu> 19 | <cpu numaid="2" affinity="0000ffff,0000ffff" arch="x86_64" vendor="AuthenticAMD" familyid="23" modelid="49"> 20 | <pci busid="ffff:ff:03.0" class="0x060400" link_speed="16 GT/s" link_width="16"> 21 | <pci busid="000b:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/> 22 | <pci busid="0105:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/> 23 | <pci busid="000c:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/> 24 | <pci busid="0106:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/> 25 | </pci> 26 | </cpu> 27 | <cpu numaid="3" affinity="0000ffff,0000ffff" arch="x86_64" vendor="AuthenticAMD" familyid="23" modelid="49"> 28 | <pci busid="ffff:ff:04.0" class="0x060400" link_speed="16 GT/s" link_width="16"> 29 | <pci busid="000d:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/> 30 | <pci busid="0107:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/> 31 | <pci busid="000e:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/> 32 | <pci busid="0108:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/> 33 | </pci> 34 | </cpu> 35 | </system> 36 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/data-science/environment/requirements.txt: -------------------------------------------------------------------------------- 1 | # for local testing (cpu) 2 | torchvision==0.12.0 3 | torch==1.13.1 4 | transformers==4.18.0 5 | 6 | # for metrics reporting/plotting 7 | mlflow==2.3.1 8 | azureml-mlflow==1.41.0 9 | matplotlib==3.5.2 10 | tqdm==4.64.0 11 | psutil==5.9.0 12 | 13 | # for unit testing 14 | pytest==7.1.2 15 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/data-science/requirements-tests.txt: -------------------------------------------------------------------------------- 1 | # NOTE: install these requirements to run the unit tests 2 | 3 | # CV packages 4 | torchvision==0.12.0 5 | torch==1.13.1 6 | transformers==4.18.0 7 | 8 | # for metrics reporting/plotting 9 | mlflow==2.3.1 10 | azureml-mlflow==1.41.0 11 | matplotlib==3.5.2 12 | tqdm==4.64.0 13 | psutil==5.9.0 14 | 15 | # for unit testing 16 | pytest==7.1.2 17 | pytest-cov==2.12.1 18 | 19 | # Fix: force protobuf downgrade to avoid exception 20 | protobuf==3.20.2 21 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/data-science/src/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .model_loader import MODEL_ARCH_LIST, get_model_metadata, load_model 2 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/data-science/src/model/swin_models.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | # Original Author: Jeff Omhover (MSFT) 4 | 5 | """ 6 | This script provides code to load and setup a variety of models from torchvision.models. 7 | """ 8 | import logging 9 | 10 | import torch 11 | from transformers import SwinConfig, SwinForImageClassification 12 | 13 | 14 | def load_swin_model( 15 | model_arch: str, output_dimension: int = 1, pretrained: bool = True 16 | ): 17 | """Loads a model from a given arch and sets it up for training""" 18 | logger = logging.getLogger(__name__) 19 | 20 | logger.info( 21 | f"Loading model from arch={model_arch} pretrained={pretrained} output_dimension={output_dimension}" 22 | ) 23 | if pretrained: 24 | model = SwinForImageClassification.from_pretrained(model_arch) 25 | else: 26 | model = SwinForImageClassification(config=SwinConfig()) 27 | 28 | model.classifier = torch.nn.Linear(model.swin.num_features, output_dimension) 29 | 30 | return model 31 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/data-science/src/model/test_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | # Original Author: Jeff Omhover (MSFT) 4 | 5 | """ 6 | Creates a super simple 32x32 CNN model for testing. 7 | From the CIFAR10 tutorial https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html 8 | """ 9 | import logging 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | 16 | class Net(nn.Module): 17 | def __init__(self, output_dimension): 18 | super().__init__() 19 | self.conv1 = nn.Conv2d(3, 6, 5) 20 | self.pool = nn.MaxPool2d(2, 2) 21 | self.conv2 = nn.Conv2d(6, 16, 5) 22 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 23 | self.fc2 = nn.Linear(120, 84) 24 | self.fc3 = nn.Linear(84, output_dimension) 25 | 26 | def forward(self, x): 27 | x = self.pool(F.relu(self.conv1(x))) 28 | x = self.pool(F.relu(self.conv2(x))) 29 | x = torch.flatten(x, 1) # flatten all dimensions except batch 30 | x = F.relu(self.fc1(x)) 31 | x = F.relu(self.fc2(x)) 32 | x = self.fc3(x) 33 | return x 34 | 35 | 36 | def load_test_model( 37 | model_arch: str, output_dimension: int = 1, pretrained: bool = True 38 | ): 39 | """Loads a model from a given arch and sets it up for training""" 40 | return Net(output_dimension) 41 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/data-science/src/model/torchvision_models.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | # Original Author: Jeff Omhover (MSFT) 4 | 5 | """ 6 | This script provides code to load and setup a variety of models from torchvision.models. 7 | """ 8 | import logging 9 | 10 | import torch 11 | import torchvision.models as models 12 | 13 | 14 | def load_torchvision_model( 15 | model_arch: str, output_dimension: int = 1, pretrained: bool = True 16 | ): 17 | """Loads a model from a given arch and sets it up for training""" 18 | logger = logging.getLogger(__name__) 19 | 20 | logger.info( 21 | f"Loading model from arch={model_arch} pretrained={pretrained} output_dimension={output_dimension}" 22 | ) 23 | if hasattr(models, model_arch): 24 | model = getattr(models, model_arch)(pretrained=pretrained) 25 | else: 26 | raise NotImplementedError( 27 | f"model_arch={model_arch} is not implemented in torchvision model zoo." 28 | ) 29 | 30 | # see https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html 31 | if model_arch.startswith("resnet"): 32 | model.fc = torch.nn.Linear(model.fc.in_features, output_dimension) 33 | elif model_arch == "alexnet": 34 | model.classifier[6] = torch.nn.Linear(4096, output_dimension) 35 | elif model_arch.startswith("vgg"): 36 | model.classifier[6] = torch.nn.Linear(4096, output_dimension) 37 | elif model_arch.startswith("densenet"): 38 | model.classifier = torch.nn.Linear(1024, output_dimension) 39 | else: 40 | raise NotImplementedError( 41 | f"loading model_arch={model_arch} is not implemented yet in our custom code." 42 | ) 43 | 44 | return model 45 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/data-science/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import logging 4 | import pytest 5 | import tempfile 6 | from unittest.mock import Mock 7 | 8 | SRC_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")) 9 | 10 | if SRC_ROOT not in sys.path: 11 | logging.info(f"Adding {SRC_ROOT} to path") 12 | sys.path.append(str(SRC_ROOT)) 13 | 14 | 15 | @pytest.fixture() 16 | def temporary_dir(): 17 | """Creates a temporary directory for the tests""" 18 | temp_directory = tempfile.TemporaryDirectory() 19 | yield temp_directory.name 20 | temp_directory.cleanup() 21 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/data-science/tests/model/test_model_loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests running the model loader for every possible model in the list 3 | """ 4 | import pytest 5 | import torch 6 | 7 | # local imports 8 | from model import ( 9 | MODEL_ARCH_LIST, 10 | get_model_metadata, 11 | load_model, 12 | ) 13 | 14 | # IMPORTANT: see conftest.py for fixtures 15 | 16 | 17 | @pytest.mark.parametrize("model_arch", MODEL_ARCH_LIST) 18 | def test_model_loader(model_arch): 19 | """Tests src/components/pytorch_image_classifier/model/""" 20 | model_metadata = get_model_metadata(model_arch) 21 | 22 | assert model_metadata is not None 23 | assert isinstance(model_metadata, dict) 24 | assert "library" in model_metadata 25 | assert "input_size" in model_metadata 26 | 27 | # using pretrained=False to avoid downloading each time we unit test 28 | model = load_model(model_arch, output_dimension=4, pretrained=False) 29 | 30 | assert model is not None 31 | assert isinstance(model, torch.nn.Module) 32 | 33 | 34 | def test_model_loader_failure(): 35 | """Test asking for a model that deosn't exist""" 36 | with pytest.raises(NotImplementedError): 37 | get_model_metadata("not_a_model") 38 | 39 | with pytest.raises(NotImplementedError): 40 | load_model("not_a_model", output_dimension=4, pretrained=False) 41 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/mlops/azureml/deploy/online/online-deployment.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json 2 | name: blue 3 | endpoint_name: dogs-classifier-online 4 | model: azureml:resnet-dogs-classifier@latest 5 | instance_type: Standard_DS2_v2 6 | instance_count: 1 7 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/mlops/azureml/deploy/online/online-endpoint.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json 2 | name: dogs-classifier-online 3 | description: Stanford Dogs Classifier 4 | auth_mode: key -------------------------------------------------------------------------------- /cv/aml-cli-v2/mlops/azureml/train/create_stanford_dogs_dataset.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json 2 | 3 | command: | 4 | tar xvfm ${{inputs.archive}} --no-same-owner -C ${{outputs.images}} #TODO: Split data into Train-Validate-Test 5 | 6 | inputs: 7 | archive: 8 | type: uri_file 9 | path: http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar 10 | 11 | outputs: 12 | images: 13 | type: uri_folder 14 | mode: upload 15 | path: azureml://datastores/workspaceblobstore/paths/azureml-vision-datasets/dogs/ 16 | 17 | environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest 18 | 19 | compute: azureml:cpu-cluster 20 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/mlops/azureml/train/pipeline.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json 2 | type: pipeline 3 | 4 | # <inputs_and_outputs> 5 | inputs: 6 | training_images: 7 | type: uri_folder 8 | mode: download # pick ro_mount, rw_mount or download 9 | path: azureml://datastores/workspaceblobstore/paths/azureml-vision-datasets/dogs/** 10 | # path: azureml://datastores/workspaceblobstore/paths/azureml-vision-datasets/places2/train//** 11 | validation_images: #TODO: Use different datasets for validation 12 | type: uri_folder 13 | mode: download # pick ro_mount, rw_mount or download 14 | path: azureml://datastores/workspaceblobstore/paths/azureml-vision-datasets/dogs/** 15 | # path: azureml://datastores/workspaceblobstore/paths/azureml-vision-datasets/places2/valid/** 16 | # </inputs_and_outputs> 17 | 18 | # <jobs> 19 | settings: 20 | default_datastore: azureml:workspaceblobstore 21 | continue_on_step_failure: true 22 | 23 | jobs: 24 | train: 25 | type: command 26 | component: file:train.yaml 27 | compute: azureml:gpu-cluster 28 | resources: 29 | instance_count: 1 # number of nodes 30 | distribution: 31 | type: pytorch 32 | process_count_per_instance: 1 # number of gpus 33 | 34 | # NOTE: set env var if needed 35 | environment_variables: 36 | NCCL_DEBUG: "INFO" # adjusts the level of info from NCCL tests 37 | 38 | # NCCL_TOPO_FILE: "/opt/microsoft/ndv4-topo.xml" # Use specific topology file for A100 39 | 40 | # NCCL_IB_PCI_RELAXED_ORDERING: "1" # Relaxed Ordering can greatly help the performance of Infiniband networks in virtualized environments. 41 | # NCCL_IB_DISABLE: "1" # force disable infiniband (if set to "1") 42 | # NCCL_NET_PLUGIN: "none" # to force NET/Plugin off (no rdma/sharp plugin at all) 43 | # NCCL_NET: "Socket" # to force node-to-node comm to use Socket (slow) 44 | # NCCL_SOCKET_IFNAME: "eth0" # to force Socket comm to use eth0 (use NCCL_NET=Socket) 45 | 46 | # UCX_IB_PCI_RELAXED_ORDERING: "on" 47 | # UCX_TLS: "tcp" 48 | # UCX_NET_DEVICES: "eth0" # if you have Error: Failed to resolve UCX endpoint... 49 | 50 | # CUDA_DEVICE_ORDER: "PCI_BUS_ID" # ordering of gpus 51 | 52 | # TORCH_DISTRIBUTED_DEBUG: "DETAIL" 53 | 54 | inputs: 55 | # data inputs 56 | train_images: ${{parent.inputs.training_images}} 57 | valid_images: ${{parent.inputs.validation_images}} 58 | 59 | # data loading 60 | batch_size: 64 61 | num_workers: 5 62 | prefetch_factor: 4 63 | persistent_workers: true 64 | pin_memory: true 65 | non_blocking: false 66 | 67 | # model 68 | model_arch: "resnet18" 69 | model_arch_pretrained: true 70 | 71 | # training 72 | num_epochs: 1 73 | learning_rate: 0.001 74 | momentum: 0.9 75 | 76 | # profiling 77 | enable_profiling: false 78 | # multiprocessing_sharing_strategy: "file_system" # WARNING: this can cause hang at job completion 79 | 80 | # Model Registrataion 81 | register_model_as: "resnet-dogs-classifier" 82 | 83 | # </jobs> 84 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/mlops/azureml/train/train-env.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json 2 | name: nvidia_pytorch 3 | build: 4 | path: ../../../data-science/environment/ 5 | tags: 6 | os: ubuntu 7 | os_version: 20.04 8 | hpcx: 2.10 9 | mpi: openmpi 10 | mpi_version: 4.1.2rc4 11 | ucx: 1.12.0 12 | cuda: 11.6.2 13 | cudnn: 8.4.0.27 14 | nccl: 2.12.10 15 | rdma_core: 36.0 16 | nsight_compute: 2022.1.1.2 17 | nsight_systems: "2022.2.1.31-5fe97ab" 18 | nccl_test: 2.11.0 19 | azureml-defaults: 1.41.0 20 | mlflow: 1.25.1 21 | transformers: 4.18.0 -------------------------------------------------------------------------------- /cv/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-model-training-pipeline 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | - name: version 14 | value: aml-cli-v2 15 | 16 | trigger: none 17 | 18 | pool: 19 | vmImage: ubuntu-20.04 20 | 21 | resources: 22 | repositories: 23 | - repository: mlops-templates # Template Repo 24 | name: mlops-templates 25 | type: git 26 | ref: main 27 | 28 | stages: 29 | - stage: DeployTrainingPipeline 30 | displayName: Deploy Training Pipeline 31 | jobs: 32 | - job: DeployTrainingPipeline 33 | steps: 34 | - checkout: self 35 | path: s/ 36 | - task: Bash@3 37 | displayName: "Create checkout repository folder(s)" 38 | inputs: 39 | targetType: "inline" 40 | script: | 41 | set -e 42 | mkdir "$(Build.Repository.Name)" 43 | mkdir "mlops-templates" 44 | - checkout: mlops-templates 45 | path: s/templates/ 46 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates 47 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates 48 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates 49 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates 50 | parameters: 51 | cluster_name: gpu-cluster 52 | size: Standard_NC6 53 | min_instances: 0 54 | max_instances: 1 55 | cluster_tier: dedicated 56 | - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates 57 | parameters: 58 | build_type: docker 59 | environment_name: nvidia_pytorch # Not used for docker builds 60 | environment_file: mlops/azureml/train/train-env.yaml 61 | - template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates 62 | parameters: 63 | data_type: training 64 | environment_file: mlops/azureml/train/create_stanford_dogs_dataset.yaml 65 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates 66 | parameters: 67 | pipeline_file: mlops/azureml/train/pipeline.yaml 68 | experiment_name: $(environment)_cv_train_$(Build.SourceBranchName) 69 | display_name: $(environment)_cv_run_$(Build.BuildID) 70 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | variables: 5 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 6 | # 'main' branch: PRD environment 7 | - template: ../../config-infra-prod.yml 8 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 9 | # 'develop' or feature branches: DEV environment 10 | - template: ../../config-infra-dev.yml 11 | - name: version 12 | value: aml-cli-v2 13 | - name: endpoint_name 14 | value: dogs-online-$(namespace)$(postfix)$(environment) 15 | - name: endpoint_type 16 | value: online 17 | 18 | trigger: none 19 | 20 | pool: 21 | vmImage: ubuntu-20.04 22 | 23 | resources: 24 | repositories: 25 | - repository: mlops-templates # Template Repo 26 | name: mlops-templates 27 | type: git 28 | ref: main 29 | 30 | stages: 31 | - stage: CreateOnlineEndpoint 32 | displayName: Create/Update Online Endpoint 33 | jobs: 34 | - job: DeployOnlineEndpoint 35 | steps: 36 | - checkout: self 37 | path: s/ 38 | - task: Bash@3 39 | displayName: "Create checkout repository folder(s)" 40 | inputs: 41 | targetType: "inline" 42 | script: | 43 | set -e 44 | mkdir "$(Build.Repository.Name)" 45 | mkdir "mlops-templates" 46 | - checkout: mlops-templates 47 | path: s/templates/ 48 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates 49 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates 50 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates 51 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates 52 | parameters: 53 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml 54 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates 55 | parameters: 56 | deployment_name: dogs-online-dp 57 | deployment_file: mlops/azureml/deploy/online/online-deployment.yml 58 | - template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates 59 | parameters: 60 | traffic_allocation: dogs-online-dp=100 61 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates 62 | parameters: 63 | deployment_name: dogs-online-dp 64 | sample_request: data/sample-request.json 65 | request_type: json 66 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/mlops/github-actions/deploy-model-training-pipeline.yml: -------------------------------------------------------------------------------- 1 | name: deploy-cv-model-training-pipeline 2 | 3 | on: 4 | workflow_dispatch: 5 | jobs: 6 | set-env-branch: 7 | runs-on: ubuntu-latest 8 | outputs: 9 | config-file: ${{ steps.set-output-defaults.outputs.config-file }} 10 | steps: 11 | - id: set-prod-branch 12 | name: set-prod-branch 13 | if: ${{ github.ref == 'refs/heads/main'}} 14 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV; 15 | - id: set-dev-branch 16 | name: setdevbranch 17 | if: ${{ github.ref != 'refs/heads/main'}} 18 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV; 19 | - id: set-output-defaults 20 | name: set-output-defaults 21 | run: | 22 | echo "config-file=$config_env" >> $GITHUB_OUTPUT; 23 | get-config: 24 | needs: set-env-branch 25 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main 26 | with: 27 | file_name: ${{ needs.set-env-branch.outputs.config-file}} 28 | create-dataprep-compute: 29 | needs: [get-config] 30 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main 31 | with: 32 | cluster_name: cpu-cluster 33 | size: Standard_DS3_v2 34 | min_instances: 0 35 | max_instances: 4 36 | cluster_tier: low_priority 37 | resource_group: ${{ needs.get-config.outputs.resource_group }} 38 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 39 | secrets: 40 | creds: ${{secrets.AZURE_CREDENTIALS}} 41 | create-training-compute: 42 | needs: get-config 43 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main 44 | with: 45 | cluster_name: gpu-cluster 46 | size: Standard_NC6 47 | min_instances: 0 48 | max_instances: 1 49 | cluster_tier: low_priority 50 | resource_group: ${{ needs.get-config.outputs.resource_group }} 51 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 52 | secrets: 53 | creds: ${{secrets.AZURE_CREDENTIALS}} 54 | register-environment: 55 | needs: [get-config, create-dataprep-compute, create-training-compute] 56 | uses: Azure/mlops-templates/.github/workflows/register-environment.yml@main 57 | with: 58 | resource_group: ${{ needs.get-config.outputs.resource_group }} 59 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 60 | environment_file: mlops/azureml/train/train-env.yaml 61 | secrets: 62 | creds: ${{secrets.AZURE_CREDENTIALS}} 63 | register-dataset: 64 | needs: [get-config, register-environment] 65 | uses: Azure/mlops-templates/.github/workflows/register-dataset.yml@main 66 | with: 67 | resource_group: ${{ needs.get-config.outputs.resource_group }} 68 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 69 | data_file: mlops/azureml/train/create_stanford_dogs_dataset.yaml 70 | file_type: Training 71 | name: stanford_dogs 72 | secrets: 73 | creds: ${{secrets.AZURE_CREDENTIALS}} 74 | run-model-training-pipeline: 75 | needs: 76 | [ 77 | get-config, 78 | create-dataprep-compute, 79 | create-training-compute, 80 | register-environment, 81 | register-dataset, 82 | ] 83 | uses: Azure/mlops-templates/.github/workflows/run-pipeline.yml@main 84 | with: 85 | resource_group: ${{ needs.get-config.outputs.resource_group }} 86 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 87 | parameters-file: mlops/azureml/train/pipeline.yaml 88 | job-name: cv-train 89 | secrets: 90 | creds: ${{secrets.AZURE_CREDENTIALS}} 91 | -------------------------------------------------------------------------------- /cv/aml-cli-v2/mlops/github-actions/deploy-online-endpoint-pipeline.yml: -------------------------------------------------------------------------------- 1 | name: deploy-online-endpoint-pipeline 2 | 3 | on: 4 | workflow_dispatch: 5 | jobs: 6 | set-env-branch: 7 | runs-on: ubuntu-latest 8 | outputs: 9 | config-file: ${{ steps.set-output-defaults.outputs.config-file }} 10 | steps: 11 | - id: set-prod-branch 12 | name: set-prod-branch 13 | if: ${{ github.ref == 'refs/heads/main'}} 14 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV; 15 | - id: set-dev-branch 16 | name: setdevbranch 17 | if: ${{ github.ref != 'refs/heads/main'}} 18 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV; 19 | - id: set-output-defaults 20 | name: set-output-defaults 21 | run: | 22 | echo "config-file=$config_env" >> $GITHUB_OUTPUT; 23 | get-config: 24 | needs: set-env-branch 25 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main 26 | with: 27 | file_name: ${{ needs.set-env-branch.outputs.config-file}} 28 | create-endpoint: 29 | needs: get-config 30 | uses: Azure/mlops-templates/.github/workflows/create-endpoint.yml@main 31 | with: 32 | resource_group: ${{ needs.get-config.outputs.resource_group }} 33 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 34 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml 35 | endpoint_name: dogs-classifier-online2 36 | endpoint_type: online 37 | secrets: 38 | creds: ${{secrets.AZURE_CREDENTIALS}} 39 | create-deployment: 40 | uses: Azure/mlops-templates/.github/workflows/create-deployment.yml@main 41 | needs: [get-config, create-endpoint] 42 | with: 43 | resource_group: ${{ needs.get-config.outputs.resource_group }} 44 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 45 | endpoint_file: mlops/azureml/deploy/online/online-deployment.yml 46 | endpoint_name: dogs-classifier-online2 47 | endpoint_type: online 48 | deployment_name: dogs-online-dp 49 | secrets: 50 | creds: ${{secrets.AZURE_CREDENTIALS}} 51 | allocate-traffic: 52 | uses: Azure/mlops-templates/.github/workflows/allocate-traffic.yml@main 53 | needs: [get-config, create-deployment] 54 | with: 55 | resource_group: ${{ needs.get-config.outputs.resource_group }} 56 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 57 | traffic_allocation: dogs-online-dp=100 58 | endpoint_name: dogs-classifier-online2 59 | secrets: 60 | creds: ${{secrets.AZURE_CREDENTIALS}} 61 | -------------------------------------------------------------------------------- /cv/python-sdk-v1/config-aml.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | 3 | ap_vm_image: ubuntu-20.04 4 | 5 | ## Training pipeline settings 6 | 7 | # Training dataset settings 8 | training_dataset_name: dogs-imgs 9 | training_dataset_description: 'Stanford Dogs Dataset (http://vision.stanford.edu/aditya86/ImageNetDogs/)' 10 | training_dataset_local_path: data/training-imgs/ 11 | training_dataset_path_on_datastore: dogs-imgs/ 12 | training_dataset_type: local 13 | training_dataset_storage_url: 'http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar' 14 | 15 | labels_dataset_name: dogs-labels 16 | labels_dataset_description: 'Labels for Stanford Dogs Dataset (http://vision.stanford.edu/aditya86/ImageNetDogs/)' 17 | labels_dataset_local_path: data/training/ 18 | labels_dataset_path_on_datastore: dogs-labels/ 19 | labels_dataset_type: local 20 | 21 | # Training AzureML Environment settings 22 | training_env_name: nvidia_pytorch 23 | training_env_path: data-science/environment/training/ 24 | 25 | # Compute target for pipeline 26 | training_target: gpu-cluster 27 | training_target_sku: Standard_NC6 28 | training_target_min_nodes: 0 29 | training_target_max_nodes: 1 30 | 31 | # Name for the training pipeline 32 | training_pipeline_name: resnet-dogs-training-pipeline 33 | training_experiment_name: resnet-dogs-training 34 | 35 | # Training arguments specification 36 | training_arguments: --epochs 2 --batch-size 64 --training-mode feature-extraction 37 | 38 | # Training datasets specification 39 | # Syntax: <name>:<version>:<mode>:<steps (names separated by +)> 40 | training_datasets: dogs-labels:1:download:prep dogs-imgs:latest:mount:train+eval 41 | 42 | # Name under which the model will be registered 43 | model_name: resnet-dogs-classifier 44 | -------------------------------------------------------------------------------- /cv/python-sdk-v1/data-science/environment/training/azureml_environment.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pytorch_manual", 3 | "environmentVariables": { 4 | "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE" 5 | }, 6 | "python": { 7 | "userManagedDependencies": false, 8 | "interpreterPath": "python", 9 | "condaDependenciesFile": null, 10 | "baseCondaEnvironment": null 11 | }, 12 | "docker": { 13 | "enabled": true, 14 | "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04", 15 | "baseDockerfile": null, 16 | "sharedVolumes": true, 17 | "shmSize": "2g", 18 | "arguments": [], 19 | "baseImageRegistry": { 20 | "address": null, 21 | "username": null, 22 | "password": null, 23 | "registryIdentity": null 24 | }, 25 | "platform": { 26 | "os": "Linux", 27 | "architecture": "amd64" 28 | } 29 | }, 30 | "spark": { 31 | "repositories": [], 32 | "packages": [], 33 | "precachePackages": true 34 | }, 35 | "databricks": { 36 | "mavenLibraries": [], 37 | "pypiLibraries": [], 38 | "rcranLibraries": [], 39 | "jarLibraries": [], 40 | "eggLibraries": [] 41 | }, 42 | "r": null, 43 | "inferencingStackVersion": null 44 | } -------------------------------------------------------------------------------- /cv/python-sdk-v1/data-science/environment/training/conda_dependencies.yml: -------------------------------------------------------------------------------- 1 | name: pytorch_manual 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.7 6 | - pip=20.2.4 7 | - pip: 8 | - pandas==1.3.5 9 | - scikit-learn==1.0.2 10 | - matplotlib==3.5.2 11 | - msrest==0.6.21 12 | - mlflow==1.27.0 13 | - azureml-core==1.43.0 14 | - azureml-defaults==1.43.0 15 | - azureml-mlflow==1.43.0 16 | - torch==1.11.0 17 | - torchvision==0.12.0 18 | -------------------------------------------------------------------------------- /cv/python-sdk-v1/data-science/notebooks/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/cv/python-sdk-v1/data-science/notebooks/.gitkeep -------------------------------------------------------------------------------- /cv/python-sdk-v1/data-science/src/model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | from .dataset import CustomImageDataset 5 | from .net import load_model 6 | -------------------------------------------------------------------------------- /cv/python-sdk-v1/data-science/src/model/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import PIL 6 | 7 | from torch.utils.data import Dataset 8 | import torchvision.transforms as transforms 9 | 10 | 11 | class CustomImageDataset(Dataset): 12 | def __init__(self, img_dir, img_labels, mode='test'): 13 | self.img_dir = img_dir 14 | self.img_labels = img_labels 15 | self.classes = img_labels.unique().tolist() 16 | 17 | self.mode = mode 18 | if self.mode == 'train': 19 | self.transform = transforms.Compose([ 20 | transforms.RandomResizedCrop(224), 21 | transforms.RandomHorizontalFlip(), 22 | transforms.ToTensor() 23 | ]) 24 | else: 25 | self.transform = transforms.Compose([ 26 | transforms.Resize(256), 27 | transforms.CenterCrop(224), 28 | transforms.ToTensor(), 29 | ]) 30 | 31 | def __len__(self): 32 | return len(self.img_labels) 33 | 34 | def __getitem__(self, idx): 35 | 36 | img_path = self.img_labels.index[idx] 37 | image = PIL.Image.open(os.path.join(self.img_dir, img_path)).convert('RGB') 38 | image = self.transform(image) 39 | 40 | img_label = self.img_labels[idx] 41 | img_class = self.classes.index(img_label) 42 | 43 | return image, img_class, img_path 44 | 45 | def nclasses(self): 46 | return len(self.classes) 47 | 48 | def get_labels(self, indexes): 49 | return [self.classes[i] for i in indexes] 50 | -------------------------------------------------------------------------------- /cv/python-sdk-v1/data-science/src/model/net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torchvision.models as models 7 | import torch.optim as optim 8 | 9 | 10 | def load_model(path=None, num_classes=2, mode='finetuning', learning_rate=0.001, momentum=0.9): 11 | 12 | # Load existing model 13 | if path: 14 | print('Loading existing model from path...') 15 | model_data = torch.load(path) 16 | model = models.resnet18(pretrained=False) 17 | model.fc = nn.Linear(model.fc.in_features, model_data['fc.weight'].shape[0]) 18 | model.load_state_dict(model_data) 19 | return model 20 | 21 | # Initialize new model 22 | assert mode in ['finetuning', 'feature-extraction'] 23 | 24 | model = models.resnet18(pretrained=True) 25 | if mode == 'feature-extraction': # Freeze layers 26 | for param in model.parameters(): 27 | param.requires_grad = False 28 | 29 | model.fc = nn.Linear(model.fc.in_features, num_classes) 30 | 31 | criterion = nn.CrossEntropyLoss() 32 | 33 | params_optim = model.parameters() if mode == 'finetuning' else model.fc.parameters() if mode == 'feature-extraction' else None 34 | optimizer = optim.SGD(params_optim, lr=learning_rate, momentum=momentum) 35 | 36 | return model, criterion, optimizer 37 | -------------------------------------------------------------------------------- /cv/python-sdk-v1/data-science/src/prep.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import argparse 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import mlflow 10 | 11 | 12 | def main(raw_data_path, prepared_data_path): 13 | 14 | print(f'Raw data path: {raw_data_path}') 15 | print(f'Output data path: {prepared_data_path}') 16 | 17 | # Read data 18 | 19 | labels_data = pd.read_csv(os.path.join(raw_data_path, 'image_labels.csv')) 20 | 21 | mlflow.log_metric('total_labels', len(labels_data)) 22 | 23 | # Split data into train and test datasets 24 | 25 | random_data = np.random.rand(len(labels_data)) 26 | labels_train = labels_data[random_data < 0.7] 27 | labels_test = labels_data[random_data >= 0.7] 28 | 29 | print(labels_train) 30 | 31 | mlflow.log_metric('train_size', labels_train.shape[0]) 32 | mlflow.log_metric('test_size', labels_test.shape[0]) 33 | 34 | labels_train.to_csv(os.path.join(prepared_data_path, 'labels_train.csv'), index=False) 35 | labels_test.to_csv(os.path.join(prepared_data_path, 'labels_test.csv'), index=False) 36 | 37 | print('Finished.') 38 | 39 | 40 | def parse_args(args_list=None): 41 | parser = argparse.ArgumentParser() 42 | parser.add_argument("--dogs-labels", type=str, required=True, help="Path to labels") 43 | parser.add_argument("--prepared_data_path", type=str, required=True, help="Path for prepared data") 44 | 45 | args_parsed, unknown = parser.parse_known_args(args_list) 46 | if unknown: 47 | print(f"Unrecognized arguments. These won't be used: {unknown}") 48 | 49 | return args_parsed 50 | 51 | 52 | if __name__ == "__main__": 53 | args = parse_args() 54 | 55 | main( 56 | raw_data_path=args.dogs_labels, 57 | prepared_data_path=args.prepared_data_path 58 | ) 59 | -------------------------------------------------------------------------------- /cv/python-sdk-v1/data-science/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/cv/python-sdk-v1/data-science/tests/.gitkeep -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: azureml-cli-v2 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python==3.8 7 | - yapf 8 | - pylint 9 | - pip 10 | - pip: 11 | - mlflow 12 | - cloudpickle==1.6.0 13 | - scikit-learn==0.24.2 14 | - flask==1.1.2 15 | - applicationinsights 16 | - pandas 17 | - azureml-core 18 | - azureml-dataset-runtime[fuse] 19 | - opencensus-ext-azure 20 | -------------------------------------------------------------------------------- /infrastructure/bicep/bicepconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "analyzers": { 3 | "core": { 4 | "enabled": true, 5 | "verbose": true, 6 | "rules": { 7 | "no-hardcoded-env-urls": { 8 | "level": "error" 9 | }, 10 | "no-unused-params": { 11 | "level": "error" 12 | }, 13 | "no-unused-vars": { 14 | "level": "error" 15 | }, 16 | "prefer-interpolation": { 17 | "level": "error" 18 | }, 19 | "secure-parameter-default": { 20 | "level": "error" 21 | }, 22 | "simplify-interpolation": { 23 | "level": "error" 24 | } 25 | } 26 | } 27 | } 28 | } -------------------------------------------------------------------------------- /infrastructure/bicep/main.bicep: -------------------------------------------------------------------------------- 1 | targetScope = 'subscription' 2 | 3 | param location string = 'westus2' 4 | param prefix string 5 | param postfix string 6 | param env string 7 | 8 | param tags object = { 9 | Owner: 'mlops-v2' 10 | Project: 'mlops-v2' 11 | Environment: env 12 | Toolkit: 'bicep' 13 | Name: prefix 14 | } 15 | 16 | var baseName = '${prefix}-${postfix}${env}' 17 | var resourceGroupName = 'rg-${baseName}' 18 | 19 | resource rg 'Microsoft.Resources/resourceGroups@2020-06-01' = { 20 | name: resourceGroupName 21 | location: location 22 | 23 | tags: tags 24 | } 25 | 26 | // Storage Account 27 | module st './modules/storage_account.bicep' = { 28 | name: 'st' 29 | scope: resourceGroup(rg.name) 30 | params: { 31 | baseName: '${uniqueString(rg.id)}${env}' 32 | location: location 33 | tags: tags 34 | } 35 | } 36 | 37 | // Key Vault 38 | module kv './modules/key_vault.bicep' = { 39 | name: 'kv' 40 | scope: resourceGroup(rg.name) 41 | params: { 42 | baseName: baseName 43 | location: location 44 | tags: tags 45 | } 46 | } 47 | 48 | // App Insights 49 | module appi './modules/application_insights.bicep' = { 50 | name: 'appi' 51 | scope: resourceGroup(rg.name) 52 | params: { 53 | baseName: baseName 54 | location: location 55 | tags: tags 56 | } 57 | } 58 | 59 | // Container Registry 60 | module cr './modules/container_registry.bicep' = { 61 | name: 'cr' 62 | scope: resourceGroup(rg.name) 63 | params: { 64 | baseName: '${uniqueString(rg.id)}${env}' 65 | location: location 66 | tags: tags 67 | } 68 | } 69 | 70 | // AML workspace 71 | module mlw './modules/aml_workspace.bicep' = { 72 | name: 'mlw' 73 | scope: resourceGroup(rg.name) 74 | params: { 75 | baseName: baseName 76 | location: location 77 | stoacctid: st.outputs.stoacctOut 78 | kvid: kv.outputs.kvOut 79 | appinsightid: appi.outputs.appinsightOut 80 | crid: cr.outputs.crOut 81 | tags: tags 82 | } 83 | } 84 | 85 | // AML compute cluster 86 | module mlwcc './modules/aml_computecluster.bicep' = { 87 | name: 'mlwcc' 88 | scope: resourceGroup(rg.name) 89 | params: { 90 | location: location 91 | workspaceName: mlw.outputs.amlsName 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /infrastructure/bicep/modules/aml_computecluster.bicep: -------------------------------------------------------------------------------- 1 | param location string 2 | param computeClusterName string = 'cpu-cluster' 3 | param workspaceName string 4 | 5 | resource amlci 'Microsoft.MachineLearningServices/workspaces/computes@2020-09-01-preview' = { 6 | name: '${workspaceName}/${computeClusterName}' 7 | location: location 8 | properties: { 9 | computeType: 'AmlCompute' 10 | properties: { 11 | vmSize: 'Standard_DS3_v2' 12 | subnet: json('null') 13 | osType: 'Linux' 14 | scaleSettings: { 15 | maxNodeCount: 4 16 | minNodeCount: 0 17 | } 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /infrastructure/bicep/modules/aml_workspace.bicep: -------------------------------------------------------------------------------- 1 | param baseName string 2 | param location string 3 | param stoacctid string 4 | param kvid string 5 | param appinsightid string 6 | param crid string 7 | param tags object 8 | 9 | // AML workspace 10 | resource amls 'Microsoft.MachineLearningServices/workspaces@2020-09-01-preview' = { 11 | name: 'mlw-${baseName}' 12 | location: location 13 | identity: { 14 | type: 'SystemAssigned' 15 | } 16 | sku: { 17 | tier: 'basic' 18 | name: 'basic' 19 | } 20 | properties: { 21 | storageAccount: stoacctid 22 | keyVault: kvid 23 | applicationInsights: appinsightid 24 | containerRegistry: crid 25 | encryption: { 26 | status: 'Disabled' 27 | keyVaultProperties: { 28 | keyIdentifier: '' 29 | keyVaultArmId: '' 30 | } 31 | } 32 | } 33 | 34 | tags: tags 35 | } 36 | 37 | output amlsName string = amls.name 38 | -------------------------------------------------------------------------------- /infrastructure/bicep/modules/application_insights.bicep: -------------------------------------------------------------------------------- 1 | param baseName string 2 | param location string 3 | param tags object 4 | 5 | // App Insights 6 | resource appinsight 'Microsoft.Insights/components@2020-02-02-preview' = { 7 | name: 'appi-${baseName}' 8 | location: location 9 | kind: 'web' 10 | properties: { 11 | Application_Type: 'web' 12 | } 13 | 14 | tags: tags 15 | } 16 | 17 | output appinsightOut string = appinsight.id 18 | -------------------------------------------------------------------------------- /infrastructure/bicep/modules/container_registry.bicep: -------------------------------------------------------------------------------- 1 | param baseName string 2 | param location string 3 | param tags object 4 | 5 | resource cr 'Microsoft.ContainerRegistry/registries@2020-11-01-preview' = { 6 | name: 'cr${baseName}' 7 | location: location 8 | sku: { 9 | name: 'Standard' 10 | } 11 | 12 | properties: { 13 | adminUserEnabled: true 14 | } 15 | 16 | tags: tags 17 | } 18 | 19 | output crOut string = cr.id 20 | -------------------------------------------------------------------------------- /infrastructure/bicep/modules/key_vault.bicep: -------------------------------------------------------------------------------- 1 | param baseName string 2 | param location string 3 | param tags object 4 | 5 | // Key Vault 6 | resource kv 'Microsoft.KeyVault/vaults@2019-09-01' = { 7 | name: 'kv-${baseName}' 8 | location: location 9 | properties: { 10 | tenantId: subscription().tenantId 11 | sku: { 12 | name: 'standard' 13 | family: 'A' 14 | } 15 | accessPolicies: [] 16 | } 17 | 18 | tags: tags 19 | } 20 | 21 | output kvOut string = kv.id 22 | -------------------------------------------------------------------------------- /infrastructure/bicep/modules/storage_account.bicep: -------------------------------------------------------------------------------- 1 | param baseName string 2 | param location string 3 | param tags object 4 | 5 | // Storage Account 6 | resource stoacct 'Microsoft.Storage/storageAccounts@2019-04-01' = { 7 | name: 'st${baseName}' 8 | location: location 9 | sku: { 10 | name: 'Standard_LRS' 11 | } 12 | kind: 'StorageV2' 13 | properties: { 14 | encryption: { 15 | services: { 16 | blob: { 17 | enabled: true 18 | } 19 | file: { 20 | enabled: true 21 | } 22 | } 23 | keySource: 'Microsoft.Storage' 24 | } 25 | supportsHttpsTrafficOnly: true 26 | } 27 | 28 | tags: tags 29 | } 30 | 31 | output stoacctOut string = stoacct.id 32 | -------------------------------------------------------------------------------- /infrastructure/bicep/pipelines/bicep-ado-deploy-infra.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: bicep-ado-deploy-infra 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | 14 | trigger: none 15 | 16 | pool: 17 | vmImage: $(ap_vm_image) 18 | 19 | stages: 20 | - stage: Lint 21 | displayName: Lint and Preflight check 22 | jobs: 23 | - job: LintBicep 24 | displayName: Lint Bicep Code 25 | steps: 26 | - checkout: self 27 | - script: | 28 | az bicep build --file ./infrastructure/main.bicep 29 | name: LintBicepCode 30 | displayName: Run Bicep Linter 31 | 32 | - stage: PreflightValidation 33 | jobs: 34 | - job: ValidateBicepCode 35 | displayName: Validate Bicep Code 36 | steps: 37 | - task: AzureCli@2 38 | name: RunPreflightValidateion 39 | displayName: Run Preflight Validation 40 | inputs: 41 | azureSubscription: $(ado_service_connection_rg) 42 | scriptType: "bash" 43 | scriptLocation: "inlineScript" 44 | inlineScript: | 45 | az deployment sub validate \ 46 | --name $(Build.DefinitionName) \ 47 | --template-file ./infrastructure/main.bicep \ 48 | --location $(location) \ 49 | --parameters location=$(location) prefix=$(namespace) postfix=$(postfix) env=$(environment) 50 | 51 | - stage: CheckOutBicepAndDeploy 52 | displayName: Deploy AML Workspace 53 | jobs: 54 | - deployment: DevDeployBicep 55 | displayName: Deploy Bicep 56 | pool: 57 | vmImage: $(ap_vm_image) 58 | environment: $(environment) 59 | strategy: 60 | runOnce: 61 | deploy: 62 | steps: 63 | - checkout: self 64 | - task: AzureCLI@2 65 | displayName: Running ${{ variables.environment }} Deployment 66 | inputs: 67 | azureSubscription: $(ado_service_connection_rg) 68 | scriptType: bash 69 | scriptLocation: inlineScript 70 | inlineScript: | 71 | az --version 72 | echo "deploying bicep..." 73 | az deployment sub create \ 74 | --name $(Build.DefinitionName) \ 75 | --location $(location) \ 76 | --template-file ./infrastructure/main.bicep \ 77 | --parameters location=$(location) prefix=$(namespace) postfix=$(postfix) env=$(environment) 78 | -------------------------------------------------------------------------------- /infrastructure/terraform/aml_deploy.tf: -------------------------------------------------------------------------------- 1 | # Resource group 2 | 3 | module "resource_group" { 4 | source = "./modules/resource-group" 5 | 6 | location = var.location 7 | 8 | prefix = var.prefix 9 | postfix = var.postfix 10 | env = var.environment 11 | 12 | tags = local.tags 13 | } 14 | 15 | # Azure Machine Learning workspace 16 | 17 | module "aml_workspace" { 18 | source = "./modules/aml-workspace" 19 | 20 | rg_name = module.resource_group.name 21 | location = module.resource_group.location 22 | 23 | prefix = var.prefix 24 | postfix = var.postfix 25 | env = var.environment 26 | 27 | storage_account_id = module.storage_account_aml.id 28 | key_vault_id = module.key_vault.id 29 | application_insights_id = module.application_insights.id 30 | container_registry_id = module.container_registry.id 31 | 32 | enable_aml_computecluster = var.enable_aml_computecluster 33 | storage_account_name = module.storage_account_aml.name 34 | 35 | tags = local.tags 36 | } 37 | 38 | # Storage account 39 | 40 | module "storage_account_aml" { 41 | source = "./modules/storage-account" 42 | 43 | rg_name = module.resource_group.name 44 | location = module.resource_group.location 45 | 46 | prefix = var.prefix 47 | postfix = var.postfix 48 | env = var.environment 49 | 50 | hns_enabled = false 51 | firewall_bypass = ["AzureServices"] 52 | firewall_virtual_network_subnet_ids = [] 53 | 54 | tags = local.tags 55 | } 56 | 57 | # Key vault 58 | 59 | module "key_vault" { 60 | source = "./modules/key-vault" 61 | 62 | rg_name = module.resource_group.name 63 | location = module.resource_group.location 64 | 65 | prefix = var.prefix 66 | postfix = var.postfix 67 | env = var.environment 68 | 69 | tags = local.tags 70 | } 71 | 72 | # Application insights 73 | 74 | module "application_insights" { 75 | source = "./modules/application-insights" 76 | 77 | rg_name = module.resource_group.name 78 | location = module.resource_group.location 79 | 80 | prefix = var.prefix 81 | postfix = var.postfix 82 | env = var.environment 83 | 84 | tags = local.tags 85 | } 86 | 87 | # Container registry 88 | 89 | module "container_registry" { 90 | source = "./modules/container-registry" 91 | 92 | rg_name = module.resource_group.name 93 | location = module.resource_group.location 94 | 95 | prefix = var.prefix 96 | postfix = var.postfix 97 | env = var.environment 98 | 99 | tags = local.tags 100 | } 101 | 102 | module "data_explorer" { 103 | source = "./modules/data-explorer" 104 | 105 | rg_name = module.resource_group.name 106 | location = module.resource_group.location 107 | 108 | prefix = var.prefix 109 | postfix = var.postfix 110 | env = var.environment 111 | key_vault_id = module.key_vault.id 112 | enable_monitoring = var.enable_monitoring 113 | 114 | client_secret = var.client_secret 115 | 116 | tags = local.tags 117 | } 118 | -------------------------------------------------------------------------------- /infrastructure/terraform/devops-pipelines/tf-ado-deploy-infra.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: tf-ado-deploy-infra 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | 14 | trigger: none 15 | 16 | pool: 17 | vmImage: ubuntu-20.04 18 | 19 | resources: 20 | repositories: 21 | - repository: mlops-templates # Template Repo 22 | name: mlops-templates 23 | type: git 24 | ref: main 25 | 26 | stages: 27 | - stage: CreateStorageAccountForTerraformState 28 | displayName: Create Storage for Terraform 29 | jobs: 30 | - job: CreateStorageForTerraform 31 | displayName: Create Storage for Terraform 32 | steps: 33 | - checkout: self 34 | path: s/ 35 | - task: Bash@3 36 | displayName: "Create checkout repository folder(s)" 37 | inputs: 38 | targetType: "inline" 39 | script: | 40 | set -e 41 | mkdir "$(Build.Repository.Name)" 42 | mkdir "mlops-templates" 43 | - checkout: mlops-templates 44 | path: s/templates/ 45 | - template: templates/infra/create-resource-group.yml@mlops-templates 46 | - template: templates/infra/create-storage-account.yml@mlops-templates 47 | - template: templates/infra/create-storage-container.yml@mlops-templates 48 | - stage: DeployAzureMachineLearningRG 49 | displayName: Deploy AML Workspace 50 | jobs: 51 | - job: DeployAMLWorkspace 52 | displayName: Deploy Terraform 53 | steps: 54 | - checkout: self 55 | path: s/ 56 | - task: Bash@3 57 | displayName: "Create checkout repository folder(s)" 58 | inputs: 59 | targetType: "inline" 60 | script: | 61 | set -e 62 | mkdir "$(Build.Repository.Name)" 63 | mkdir "mlops-templates" 64 | - checkout: mlops-templates 65 | path: s/templates/ 66 | - template: templates/infra/create-sp-variables.yml@mlops-templates 67 | - template: templates/infra/install-terraform.yml@mlops-templates 68 | - template: templates/infra/run-terraform-init.yml@mlops-templates 69 | - template: templates/infra/run-terraform-validate.yml@mlops-templates 70 | - template: templates/infra/run-terraform-plan.yml@mlops-templates 71 | - template: templates/infra/run-terraform-apply.yml@mlops-templates 72 | -------------------------------------------------------------------------------- /infrastructure/terraform/github-actions/tf-gha-deploy-infra.yml: -------------------------------------------------------------------------------- 1 | name: tf-gha-deploy-infra.yml 2 | 3 | on: 4 | #push: 5 | workflow_dispatch: 6 | env: 7 | config_env: "none" 8 | jobs: 9 | set-env-branch: 10 | runs-on: ubuntu-latest 11 | outputs: 12 | config-file: ${{ steps.set-output-defaults.outputs.config-file }} 13 | steps: 14 | - id: set-prod-branch 15 | name: set-prod-branch 16 | if: ${{ github.ref == 'refs/heads/main'}} 17 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV; 18 | - id: set-dev-branch 19 | name: setdevbranch 20 | if: ${{ github.ref != 'refs/heads/main'}} 21 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV; 22 | - id: set-output-defaults 23 | name: set-output-defaults 24 | run: | 25 | echo "config-file=$config_env" >> $GITHUB_OUTPUT; 26 | get-config: 27 | needs: set-env-branch 28 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main 29 | with: 30 | file_name: ${{ needs.set-env-branch.outputs.config-file}} 31 | test-terraform-state-deployment: 32 | needs: [get-config, set-env-branch] 33 | uses: Azure/mlops-templates/.github/workflows/tf-gha-install-terraform.yml@main 34 | with: 35 | TFAction: "apply" 36 | dply_environment: ${{ needs.set-env-branch.outputs.config-file }} 37 | location: ${{ needs.get-config.outputs.location }} 38 | namespace: ${{ needs.get-config.outputs.namespace }} 39 | postfix: ${{ needs.get-config.outputs.postfix }} 40 | environment: ${{ needs.get-config.outputs.environment }} 41 | enable_aml_computecluster: ${{ needs.get-config.outputs.enable_aml_computecluster == true }} ## TODO review the evaluation of boolean 42 | enable_monitoring: ${{ needs.get-config.outputs.enable_monitoring == true }} ## TODO review the evaluation of boolean 43 | terraform_version: ${{ needs.get-config.outputs.terraform_version }} 44 | terraform_workingdir: ${{ needs.get-config.outputs.terraform_workingdir }} 45 | terraform_st_location: ${{ needs.get-config.outputs.terraform_st_location }} 46 | terraform_st_storage_account: ${{ needs.get-config.outputs.terraform_st_storage_account }} 47 | terraform_st_resource_group: ${{ needs.get-config.outputs.terraform_st_resource_group }} 48 | terraform_st_container_name: ${{ needs.get-config.outputs.terraform_st_container_name }} 49 | terraform_st_key: ${{ needs.get-config.outputs.terraform_st_key }} 50 | terraform_plan_location: ${{ needs.get-config.outputs.location }} 51 | terraform_plan_vnet: "TBD" # TBD 52 | secrets: 53 | azure_creds: ${{ secrets.AZURE_CREDENTIALS }} 54 | clientId: ${{ secrets.ARM_CLIENT_ID }} 55 | clientSecret: ${{ secrets.ARM_CLIENT_SECRET }} 56 | subscriptionId: ${{ secrets.ARM_SUBSCRIPTION_ID }} 57 | tenantId: ${{ secrets.ARM_TENANT_ID }} 58 | deploy-azureml-resources: 59 | runs-on: ubuntu-latest 60 | steps: 61 | - id: deploy-aml-workspace 62 | name: deploy-aml-workspace 63 | run: echo "OK" 64 | -------------------------------------------------------------------------------- /infrastructure/terraform/locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | tags = { 3 | Owner = "mlops-v2" 4 | Project = "mlops-v2" 5 | Environment = "${var.environment}" 6 | Toolkit = "terraform" 7 | Name = "${var.prefix}" 8 | } 9 | } -------------------------------------------------------------------------------- /infrastructure/terraform/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | backend "azurerm" {} 3 | required_providers { 4 | azurerm = { 5 | version = "= 2.99.0" 6 | } 7 | } 8 | } 9 | 10 | provider "azurerm" { 11 | features {} 12 | } 13 | 14 | data "azurerm_client_config" "current" {} 15 | 16 | data "http" "ip" { 17 | url = "https://ifconfig.me" 18 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/aml-workspace/main.tf: -------------------------------------------------------------------------------- 1 | resource "azurerm_machine_learning_workspace" "mlw" { 2 | name = "mlw-${var.prefix}-${var.postfix}${var.env}" 3 | location = var.location 4 | resource_group_name = var.rg_name 5 | application_insights_id = var.application_insights_id 6 | key_vault_id = var.key_vault_id 7 | storage_account_id = var.storage_account_id 8 | container_registry_id = var.container_registry_id 9 | 10 | sku_name = "Basic" 11 | 12 | identity { 13 | type = "SystemAssigned" 14 | } 15 | 16 | tags = var.tags 17 | } 18 | 19 | # Compute cluster 20 | 21 | resource "azurerm_machine_learning_compute_cluster" "adl_aml_ws_compute_cluster" { 22 | name = "cpu-cluster" 23 | location = var.location 24 | vm_priority = "LowPriority" 25 | vm_size = "Standard_DS3_v2" 26 | machine_learning_workspace_id = azurerm_machine_learning_workspace.mlw.id 27 | count = var.enable_aml_computecluster ? 1 : 0 28 | 29 | scale_settings { 30 | min_node_count = 0 31 | max_node_count = 4 32 | scale_down_nodes_after_idle_duration = "PT120S" # 120 seconds 33 | } 34 | } 35 | 36 | # # Datastore 37 | 38 | # resource "azurerm_resource_group_template_deployment" "arm_aml_create_datastore" { 39 | # name = "arm_aml_create_datastore" 40 | # resource_group_name = var.rg_name 41 | # deployment_mode = "Incremental" 42 | # parameters_content = jsonencode({ 43 | # "WorkspaceName" = { 44 | # value = azurerm_machine_learning_workspace.mlw.name 45 | # }, 46 | # "StorageAccountName" = { 47 | # value = var.storage_account_name 48 | # } 49 | # }) 50 | 51 | # depends_on = [time_sleep.wait_30_seconds] 52 | 53 | # template_content = <<TEMPLATE 54 | # { 55 | # "$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 56 | # "contentVersion": "1.0.0.0", 57 | # "parameters": { 58 | # "WorkspaceName": { 59 | # "type": "String" 60 | # }, 61 | # "StorageAccountName": { 62 | # "type": "String" 63 | # } 64 | # }, 65 | # "resources": [ 66 | # { 67 | # "type": "Microsoft.MachineLearningServices/workspaces/datastores", 68 | # "apiVersion": "2021-03-01-preview", 69 | # "name": "[concat(parameters('WorkspaceName'), '/default')]", 70 | # "dependsOn": [], 71 | # "properties": { 72 | # "contents": { 73 | # "accountName": "[parameters('StorageAccountName')]", 74 | # "containerName": "default", 75 | # "contentsType": "AzureBlob", 76 | # "credentials": { 77 | # "credentialsType": "None" 78 | # }, 79 | # "endpoint": "core.windows.net", 80 | # "protocol": "https" 81 | # }, 82 | # "description": "Default datastore for mlops-tabular", 83 | # "isDefault": false, 84 | # "properties": { 85 | # "ServiceDataAccessAuthIdentity": "None" 86 | # }, 87 | # "tags": {} 88 | # } 89 | # } 90 | # ] 91 | # } 92 | # TEMPLATE 93 | # } 94 | 95 | # resource "time_sleep" "wait_30_seconds" { 96 | 97 | # depends_on = [ 98 | # azurerm_machine_learning_workspace.mlw 99 | # ] 100 | 101 | # create_duration = "30s" 102 | # } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/aml-workspace/outputs.tf: -------------------------------------------------------------------------------- 1 | output "name" { 2 | value = azurerm_machine_learning_workspace.mlw.name 3 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/aml-workspace/variables.tf: -------------------------------------------------------------------------------- 1 | variable "rg_name" { 2 | type = string 3 | description = "Resource group name" 4 | } 5 | 6 | variable "location" { 7 | type = string 8 | description = "Location of the resource group" 9 | } 10 | 11 | variable "tags" { 12 | type = map(string) 13 | default = {} 14 | description = "A mapping of tags which should be assigned to the deployed resource" 15 | } 16 | 17 | variable "prefix" { 18 | type = string 19 | description = "Prefix for the module name" 20 | } 21 | 22 | variable "postfix" { 23 | type = string 24 | description = "Postfix for the module name" 25 | } 26 | 27 | variable "env" { 28 | type = string 29 | description = "Environment prefix" 30 | } 31 | 32 | variable "storage_account_id" { 33 | type = string 34 | description = "The ID of the Storage Account linked to AML workspace" 35 | } 36 | 37 | variable "key_vault_id" { 38 | type = string 39 | description = "The ID of the Key Vault linked to AML workspace" 40 | } 41 | 42 | variable "application_insights_id" { 43 | type = string 44 | description = "The ID of the Application Insights linked to AML workspace" 45 | } 46 | 47 | variable "container_registry_id" { 48 | type = string 49 | description = "The ID of the Container Registry linked to AML workspace" 50 | } 51 | 52 | variable "enable_aml_computecluster" { 53 | description = "Variable to enable or disable AML compute cluster" 54 | default = false 55 | } 56 | 57 | variable "storage_account_name" { 58 | type = string 59 | description = "The Name of the Storage Account linked to AML workspace" 60 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/application-insights/main.tf: -------------------------------------------------------------------------------- 1 | resource "azurerm_application_insights" "appi" { 2 | name = "appi-${var.prefix}-${var.postfix}${var.env}" 3 | location = var.location 4 | resource_group_name = var.rg_name 5 | application_type = "web" 6 | 7 | tags = var.tags 8 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/application-insights/outputs.tf: -------------------------------------------------------------------------------- 1 | output "id" { 2 | value = azurerm_application_insights.appi.id 3 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/application-insights/variables.tf: -------------------------------------------------------------------------------- 1 | variable "rg_name" { 2 | type = string 3 | description = "Resource group name" 4 | } 5 | 6 | variable "location" { 7 | type = string 8 | description = "Location of the resource group" 9 | } 10 | 11 | variable "tags" { 12 | type = map(string) 13 | default = {} 14 | description = "A mapping of tags which should be assigned to the deployed resource" 15 | } 16 | 17 | variable "prefix" { 18 | type = string 19 | description = "Prefix for the module name" 20 | } 21 | 22 | variable "postfix" { 23 | type = string 24 | description = "Postfix for the module name" 25 | } 26 | 27 | variable "env" { 28 | type = string 29 | description = "Environment prefix" 30 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/container-registry/main.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | safe_prefix = replace(var.prefix, "-", "") 3 | safe_postfix = replace(var.postfix, "-", "") 4 | } 5 | 6 | resource "azurerm_container_registry" "cr" { 7 | name = "cr${local.safe_prefix}${local.safe_postfix}${var.env}" 8 | resource_group_name = var.rg_name 9 | location = var.location 10 | sku = "Standard" 11 | admin_enabled = true 12 | 13 | tags = var.tags 14 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/container-registry/outputs.tf: -------------------------------------------------------------------------------- 1 | output "id" { 2 | value = azurerm_container_registry.cr.id 3 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/container-registry/variables.tf: -------------------------------------------------------------------------------- 1 | variable "rg_name" { 2 | type = string 3 | description = "Resource group name" 4 | } 5 | 6 | variable "location" { 7 | type = string 8 | description = "Location of the resource group" 9 | } 10 | 11 | variable "tags" { 12 | type = map(string) 13 | default = {} 14 | description = "A mapping of tags which should be assigned to the deployed resource" 15 | } 16 | 17 | variable "prefix" { 18 | type = string 19 | description = "Prefix for the module name" 20 | } 21 | 22 | variable "postfix" { 23 | type = string 24 | description = "Postfix for the module name" 25 | } 26 | 27 | variable "env" { 28 | type = string 29 | description = "Environment prefix" 30 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/data-explorer/main.tf: -------------------------------------------------------------------------------- 1 | data "azurerm_client_config" "current" {} 2 | 3 | resource "azurerm_kusto_cluster" "cluster" { 4 | name = "adx${var.prefix}${var.postfix}${var.env}" 5 | location = var.location 6 | resource_group_name = var.rg_name 7 | streaming_ingestion_enabled = true 8 | language_extensions = ["PYTHON"] 9 | count = var.enable_monitoring ? 1 : 0 10 | 11 | sku { 12 | name = "Standard_D11_v2" 13 | capacity = 2 14 | } 15 | tags = var.tags 16 | } 17 | 18 | resource "azurerm_kusto_database" "database" { 19 | name = "mlmonitoring" 20 | resource_group_name = var.rg_name 21 | location = var.location 22 | cluster_name = azurerm_kusto_cluster.cluster[0].name 23 | count = var.enable_monitoring ? 1 : 0 24 | } 25 | 26 | resource "azurerm_key_vault_secret" "SP_ID" { 27 | name = "kvmonitoringspid" 28 | value = data.azurerm_client_config.current.client_id 29 | key_vault_id = var.key_vault_id 30 | count = var.enable_monitoring ? 1 : 0 31 | } 32 | 33 | resource "azurerm_key_vault_secret" "SP_KEY" { 34 | name = "kvmonitoringspkey" 35 | value = trim(var.client_secret, "'") 36 | key_vault_id = var.key_vault_id 37 | count = var.enable_monitoring ? 1 : 0 38 | } 39 | 40 | resource "azurerm_key_vault_secret" "SP_TENANT_ID" { 41 | name = "kvmonitoringadxtenantid" 42 | value = data.azurerm_client_config.current.tenant_id 43 | key_vault_id = var.key_vault_id 44 | count = var.enable_monitoring ? 1 : 0 45 | } 46 | 47 | resource "azurerm_key_vault_secret" "ADX_URI" { 48 | name = "kvmonitoringadxuri" 49 | value = azurerm_kusto_cluster.cluster[0].uri 50 | key_vault_id = var.key_vault_id 51 | count = var.enable_monitoring ? 1 : 0 52 | } 53 | 54 | resource "azurerm_key_vault_secret" "ADX_DB" { 55 | name = "kvmonitoringadxdb" 56 | value = azurerm_kusto_database.database[0].name 57 | key_vault_id = var.key_vault_id 58 | count = var.enable_monitoring ? 1 : 0 59 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/data-explorer/outputs.tf: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /infrastructure/terraform/modules/data-explorer/variables.tf: -------------------------------------------------------------------------------- 1 | variable "rg_name" { 2 | type = string 3 | description = "Resource group name" 4 | } 5 | 6 | variable "location" { 7 | type = string 8 | description = "Location of the resource group" 9 | } 10 | 11 | variable "tags" { 12 | type = map(string) 13 | default = {} 14 | description = "A mapping of tags which should be assigned to the deployed resource" 15 | } 16 | 17 | variable "prefix" { 18 | type = string 19 | description = "Prefix for the module name" 20 | } 21 | 22 | variable "postfix" { 23 | type = string 24 | description = "Postfix for the module name" 25 | } 26 | 27 | variable "env" { 28 | type = string 29 | description = "Environment prefix" 30 | } 31 | 32 | variable "key_vault_id" { 33 | type = string 34 | description = "The ID of the Key Vault linked to AML workspace" 35 | } 36 | 37 | variable "enable_monitoring" { 38 | description = "Variable to enable or disable AML compute cluster" 39 | default = false 40 | } 41 | 42 | variable "client_secret" { 43 | description = "client secret" 44 | default = false 45 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/key-vault/main.tf: -------------------------------------------------------------------------------- 1 | data "azurerm_client_config" "current" {} 2 | 3 | resource "azurerm_key_vault" "kv" { 4 | name = "kv-${var.prefix}-${var.postfix}${var.env}" 5 | location = var.location 6 | resource_group_name = var.rg_name 7 | tenant_id = data.azurerm_client_config.current.tenant_id 8 | sku_name = "standard" 9 | 10 | tags = var.tags 11 | access_policy { 12 | tenant_id = data.azurerm_client_config.current.tenant_id 13 | object_id = data.azurerm_client_config.current.object_id 14 | 15 | key_permissions = [ 16 | "Create", 17 | "Get", 18 | ] 19 | 20 | secret_permissions = [ 21 | "Set", 22 | "Get", 23 | "Delete", 24 | "Purge", 25 | "Recover" 26 | ] 27 | } 28 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/key-vault/outputs.tf: -------------------------------------------------------------------------------- 1 | output "id" { 2 | value = azurerm_key_vault.kv.id 3 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/key-vault/variables.tf: -------------------------------------------------------------------------------- 1 | variable "rg_name" { 2 | type = string 3 | description = "Resource group name" 4 | } 5 | 6 | variable "location" { 7 | type = string 8 | description = "Location of the resource group" 9 | } 10 | 11 | variable "tags" { 12 | type = map(string) 13 | default = {} 14 | description = "A mapping of tags which should be assigned to the deployed resource" 15 | } 16 | 17 | variable "prefix" { 18 | type = string 19 | description = "Prefix for the module name" 20 | } 21 | 22 | variable "postfix" { 23 | type = string 24 | description = "Postfix for the module name" 25 | } 26 | 27 | variable "env" { 28 | type = string 29 | description = "Environment prefix" 30 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/resource-group/main.tf: -------------------------------------------------------------------------------- 1 | resource "azurerm_resource_group" "adl_rg" { 2 | name = "rg-${var.prefix}-${var.postfix}${var.env}" 3 | location = var.location 4 | tags = var.tags 5 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/resource-group/outputs.tf: -------------------------------------------------------------------------------- 1 | output "name" { 2 | value = azurerm_resource_group.adl_rg.name 3 | } 4 | 5 | output "location" { 6 | value = azurerm_resource_group.adl_rg.location 7 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/resource-group/variables.tf: -------------------------------------------------------------------------------- 1 | variable "location" { 2 | type = string 3 | default = "North Europe" 4 | description = "Location of the Resource Group" 5 | } 6 | 7 | variable "tags" { 8 | type = map(string) 9 | default = {} 10 | description = "A mapping of tags which should be assigned to the Resource Group" 11 | } 12 | 13 | variable "prefix" { 14 | type = string 15 | description = "Prefix for the module name" 16 | } 17 | 18 | variable "postfix" { 19 | type = string 20 | description = "Postfix for the module name" 21 | } 22 | 23 | variable "env" { 24 | type = string 25 | description = "Environment prefix" 26 | } -------------------------------------------------------------------------------- /infrastructure/terraform/modules/storage-account/main.tf: -------------------------------------------------------------------------------- 1 | data "azurerm_client_config" "current" {} 2 | 3 | data "http" "ip" { 4 | url = "https://ifconfig.me" 5 | } 6 | 7 | locals { 8 | safe_prefix = replace(var.prefix, "-", "") 9 | safe_postfix = replace(var.postfix, "-", "") 10 | } 11 | 12 | resource "azurerm_storage_account" "st" { 13 | name = "st${local.safe_prefix}${local.safe_postfix}${var.env}" 14 | resource_group_name = var.rg_name 15 | location = var.location 16 | account_tier = "Standard" 17 | account_replication_type = "LRS" 18 | account_kind = "StorageV2" 19 | is_hns_enabled = var.hns_enabled 20 | 21 | tags = var.tags 22 | 23 | } 24 | 25 | # Virtual Network & Firewall configuration 26 | 27 | resource "azurerm_storage_account_network_rules" "firewall_rules" { 28 | storage_account_id = azurerm_storage_account.st.id 29 | 30 | default_action = "Allow" 31 | ip_rules = [] # [data.http.ip.body] 32 | virtual_network_subnet_ids = var.firewall_virtual_network_subnet_ids 33 | bypass = var.firewall_bypass 34 | } 35 | -------------------------------------------------------------------------------- /infrastructure/terraform/modules/storage-account/outputs.tf: -------------------------------------------------------------------------------- 1 | output "id" { 2 | value = azurerm_storage_account.st.id 3 | } 4 | 5 | output "name" { 6 | value = azurerm_storage_account.st.name 7 | } 8 | -------------------------------------------------------------------------------- /infrastructure/terraform/modules/storage-account/variables.tf: -------------------------------------------------------------------------------- 1 | variable "rg_name" { 2 | type = string 3 | description = "Resource group name" 4 | } 5 | 6 | variable "location" { 7 | type = string 8 | description = "Location of the resource group" 9 | } 10 | 11 | variable "tags" { 12 | type = map(string) 13 | default = {} 14 | description = "A mapping of tags which should be assigned to the Resource Group" 15 | } 16 | 17 | variable "prefix" { 18 | type = string 19 | description = "Prefix for the module name" 20 | } 21 | 22 | variable "postfix" { 23 | type = string 24 | description = "Postfix for the module name" 25 | } 26 | 27 | variable "env" { 28 | type = string 29 | description = "Environment prefix" 30 | } 31 | 32 | variable "hns_enabled" { 33 | type = bool 34 | description = "Hierarchical namespaces enabled/disabled" 35 | default = true 36 | } 37 | 38 | variable "firewall_virtual_network_subnet_ids" { 39 | default = [] 40 | } 41 | 42 | variable "firewall_bypass" { 43 | default = ["None"] 44 | } -------------------------------------------------------------------------------- /infrastructure/terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "location" { 2 | type = string 3 | description = "Location of the resource group and modules" 4 | } 5 | 6 | variable "prefix" { 7 | type = string 8 | description = "Prefix for module names" 9 | } 10 | 11 | variable "environment" { 12 | type = string 13 | description = "Environment information" 14 | } 15 | 16 | variable "postfix" { 17 | type = string 18 | description = "Postfix for module names" 19 | } 20 | 21 | variable "enable_aml_computecluster" { 22 | description = "Variable to enable or disable AML compute cluster" 23 | } 24 | 25 | variable "enable_monitoring" { 26 | description = "Variable to enable or disable Monitoring" 27 | } 28 | 29 | variable "client_secret" { 30 | description = "Service Principal Secret" 31 | } 32 | -------------------------------------------------------------------------------- /nlp/README.md: -------------------------------------------------------------------------------- 1 | # Natural Language Processing 2 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/data-science/environments/inference/conda_env.yml: -------------------------------------------------------------------------------- 1 | name: nlp_inference_conda_env 2 | channels: 3 | - pytorch 4 | - anaconda 5 | - defaults 6 | - conda-forge 7 | dependencies: 8 | - python=3.8 9 | - pip=21.2.4 10 | - pytorch=1.10.0 11 | - torchvision=0.11.1 12 | - torchaudio=0.10.0 13 | - cudatoolkit=11.1.1 14 | - nvidia-apex=0.1.0 15 | - gxx_linux-64=8.5.0 16 | - pip: 17 | - azureml-defaults==1.39.0 18 | - azureml-mlflow==1.39.0 19 | - azureml-telemetry==1.39.0 20 | - azureml-train-core==1.39.0 21 | - mlflow==1.24.0 22 | - transformers==4.17.0 23 | - 'inference-schema[numpy-support]==1.3.0' 24 | - applicationinsights==0.11.10 25 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/data-science/environments/training/Dockerfile: -------------------------------------------------------------------------------- 1 | # check release notes https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html 2 | FROM nvcr.io/nvidia/pytorch:22.04-py3 3 | 4 | # Install dependencies missing in this container 5 | # NOTE: container already has matplotlib==3.5.1 tqdm==4.62.0 6 | COPY requirements.txt ./ 7 | RUN pip install -r requirements.txt 8 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/data-science/environments/training/requirements.txt: -------------------------------------------------------------------------------- 1 | # data science requirements 2 | # torchvision==0.12.0 3 | # torch==1.11.0 4 | pytorch_lightning==1.6.4 5 | transformers==4.18.0 6 | datasets==2.3.2 7 | rouge_score==0.0.4 8 | sentencepiece==0.1.96 9 | 10 | # for metrics reporting/plotting 11 | mlflow==2.3.1 12 | azureml-mlflow==1.41.0 13 | # matplotlib==3.5.2 14 | # tqdm==4.64.0 15 | psutil==5.9.0 16 | 17 | # for unit testing 18 | pytest==7.1.2 19 | 20 | # for azure ml SDK v2 21 | azure-ai-ml==1.1.0 22 | azure-common==1.1.28 23 | azure-core==1.26.1 24 | azure-identity==1.10.0 25 | azure-mgmt-core==1.3.0 26 | azure-storage-blob==12.14.1 27 | azure-storage-file-datalake==12.9.1 28 | azure-storage-file-share==12.7.0 -------------------------------------------------------------------------------- /nlp/aml-cli-v2/data-science/src/summarization/compare.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import logging 4 | import mlflow 5 | import json 6 | from distutils.util import strtobool 7 | 8 | def main(): 9 | """Main function of the script.""" 10 | # initialize root logger 11 | logger = logging.getLogger() 12 | logger.setLevel(logging.INFO) 13 | console_handler = logging.StreamHandler() 14 | formatter = logging.Formatter( 15 | "%(asctime)s : %(levelname)s : %(name)s : %(message)s" 16 | ) 17 | console_handler.setFormatter(formatter) 18 | logger.addHandler(console_handler) 19 | 20 | # input and output arguments 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument( 23 | "--baseline_metrics", 24 | type=str, 25 | required=True, 26 | help="path to baseline metrics folder containing all_results.json", 27 | ) 28 | parser.add_argument( 29 | "--candidate_metrics", 30 | type=str, 31 | required=True, 32 | help="path to candidate metrics folder containing all_results.json", 33 | ) 34 | parser.add_argument( 35 | "--reference_metric", 36 | type=str, 37 | default="predict_rougeLsum", 38 | help="name of reference metric for shipping flag (default: predict_rougeLsum)", 39 | ) 40 | parser.add_argument( 41 | "--force_comparison", type=strtobool, default=False, help="set to True to bypass comparison and set --deploy_flag to True" 42 | ) 43 | parser.add_argument( 44 | "--deploy_flag", type=str, help="a deploy flag whether to deploy or not" 45 | ) 46 | 47 | args = parser.parse_args() 48 | 49 | # Start Logging 50 | mlflow.start_run() 51 | 52 | logger.info(f"Running with arguments: {args}") 53 | 54 | # open metrics on both sides 55 | with open(os.path.join(args.baseline_metrics, "all_results.json")) as in_file: 56 | baseline_metrics = json.loads(in_file.read()) 57 | with open(os.path.join(args.candidate_metrics, "all_results.json")) as in_file: 58 | candidate_metrics = json.loads(in_file.read()) 59 | 60 | # should we ship or not? 61 | if args.force_comparison: 62 | deploy_flag = True 63 | else: 64 | deploy_flag = ( 65 | candidate_metrics[args.reference_metric] 66 | > baseline_metrics[args.reference_metric] 67 | ) 68 | 69 | logger.info("baseline_metrics[{}]={}, candidate_metrics[{}]={}, deploy_flag={} (force_comparison={})".format( 70 | args.reference_metric, 71 | baseline_metrics[args.reference_metric], 72 | args.reference_metric, 73 | candidate_metrics[args.reference_metric], 74 | deploy_flag, 75 | args.force_comparison 76 | )) 77 | 78 | # save deploy_flag as a file 79 | os.makedirs(args.deploy_flag, exist_ok=True) 80 | with open(os.path.join(args.deploy_flag, "deploy_flag"), "w") as out_file: 81 | out_file.write("%d" % int(deploy_flag)) 82 | 83 | # Stop Logging 84 | mlflow.end_run() 85 | 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/data-science/src/summarization/register.py: -------------------------------------------------------------------------------- 1 | from azureml.core import Run 2 | from azureml.core.model import Model 3 | 4 | import os 5 | import argparse 6 | import logging 7 | import mlflow 8 | 9 | 10 | def main(): 11 | """Main function of the script.""" 12 | # initialize root logger 13 | logger = logging.getLogger() 14 | logger.setLevel(logging.INFO) 15 | console_handler = logging.StreamHandler() 16 | formatter = logging.Formatter( 17 | "%(asctime)s : %(levelname)s : %(name)s : %(message)s" 18 | ) 19 | console_handler.setFormatter(formatter) 20 | logger.addHandler(console_handler) 21 | 22 | # input and output arguments 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument( 25 | "--model_folder", 26 | type=str, 27 | required=True, 28 | help="folder containing model", 29 | ) 30 | parser.add_argument( 31 | "--register_as", 32 | type=str, 33 | required=True, 34 | help="name to use for model registration in AzureML", 35 | ) 36 | parser.add_argument( 37 | "--deploy_flag", type=str, required=True, help="a deploy flag whether to deploy or not" 38 | ) 39 | 40 | args = parser.parse_args() 41 | logger.info(f"Running with arguments: {args}") 42 | 43 | # Start Logging 44 | mlflow.start_run() 45 | 46 | if os.path.isfile(args.deploy_flag): 47 | deploy_flag_file_path = args.deploy_flag 48 | else: 49 | deploy_flag_file_path = os.path.join(args.deploy_flag, "deploy_flag") 50 | 51 | logger.info(f"Opening deploy_flag file from {deploy_flag_file_path}") 52 | with open(deploy_flag_file_path, 'rb') as in_file: 53 | deploy_flag = bool(int(in_file.read())) 54 | 55 | if deploy_flag: 56 | logger.info(f"Deploy flag is True, registering model as {args.register_as}...") 57 | run = Run.get_context() 58 | 59 | # if we're running locally, except 60 | if run.__class__.__name__ == "_OfflineRun": 61 | raise Exception("You can't run this script locally, you will need to run it as an AzureML job.") 62 | 63 | _ = Model.register( 64 | run.experiment.workspace, 65 | model_name=args.register_as, 66 | model_path=args.model_folder, 67 | tags={ 68 | "type": "huggingface", 69 | "task": "summarization" 70 | }, 71 | description="Huggingface model finetuned for summarization", 72 | ) 73 | else: 74 | logger.info(f"Deploy flag is False, pass.") 75 | 76 | # Stop Logging 77 | mlflow.end_run() 78 | 79 | 80 | if __name__ == "__main__": 81 | main() 82 | 83 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/data-science/src/summarization/score.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import json 4 | from transformers import ( 5 | AutoModelForSeq2SeqLM, 6 | AutoTokenizer, 7 | ) 8 | 9 | def init(): 10 | """ 11 | This function is called when the container is initialized/started, typically after create/update of the deployment. 12 | You can write the logic here to perform init operations like caching the model in memory 13 | """ 14 | global model, tokenizer 15 | # AZUREML_MODEL_DIR is an environment variable created during deployment. 16 | # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION) 17 | model_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), os.listdir(os.getenv("AZUREML_MODEL_DIR"))[0]) 18 | print("model_path") 19 | print(os.listdir(model_path)) 20 | model = AutoModelForSeq2SeqLM.from_pretrained(model_path) 21 | tokenizer = AutoTokenizer.from_pretrained(model_path) 22 | print("Init complete") 23 | 24 | 25 | def run(raw_data): 26 | global model, tokenizer 27 | """ 28 | This function is called for every invocation of the endpoint to perform the actual scoring/prediction. 29 | In the example we extract the data from the json input and call the scikit-learn model's predict() 30 | method and return the result back 31 | """ 32 | logging.info("Request received") 33 | article = json.loads(raw_data)["data"] 34 | if "t5" in model.config.architectures[0].lower(): 35 | article= "summarize:" + article 36 | 37 | inputs = tokenizer(article, return_tensors="pt", max_length=512, truncation=True) 38 | outputs = model.generate( 39 | inputs["input_ids"], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True 40 | ) 41 | result = tokenizer.decode(outputs[0]) 42 | print(result) 43 | logging.info("Request processed") 44 | return result 45 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/mlops/azureml/deploy/online/online-deployment.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json 2 | 3 | # reference to the endpoint 4 | name: green 5 | endpoint_name: nlp-summarize-online 6 | 7 | # compute 8 | instance_type: STANDARD_DS5_V2 9 | instance_count: 1 10 | 11 | # model 12 | model: azureml:pubmed-summarization@latest 13 | 14 | # scoring code 15 | code_configuration: 16 | code: ../../../../data-science/src/summarization/ 17 | scoring_script: score.py 18 | 19 | # custom scoring environment 20 | environment: 21 | conda_file: ../../../../data-science/environments/inference/conda_env.yml 22 | image: mcr.microsoft.com/azureml/minimal-ubuntu18.04-py37-cpu-inference:latest 23 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/mlops/azureml/deploy/online/online-endpoint.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json 2 | name: nlp-summarize-online 3 | description: summarization model 4 | auth_mode: key 5 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/mlops/azureml/train/train-env.yml: -------------------------------------------------------------------------------- 1 | 2 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json 3 | name: nlp_summarization_train 4 | version: mlopsv2-july2022 5 | build: 6 | path: ../../../data-science/environments/training/ 7 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-model-training-pipeline 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | - name: version 14 | value: aml-cli-v2 15 | 16 | trigger: none 17 | 18 | pool: 19 | vmImage: ubuntu-20.04 20 | 21 | resources: 22 | repositories: 23 | - repository: mlops-templates # Template Repo 24 | name: mlops-templates 25 | type: git 26 | ref: main 27 | 28 | stages: 29 | - stage: DeployTrainingPipeline 30 | displayName: Deploy Training Pipeline 31 | jobs: 32 | - job: DeployTrainingPipeline 33 | steps: 34 | - checkout: self 35 | path: s/ 36 | - task: Bash@3 37 | displayName: "Create checkout repository folder(s)" 38 | inputs: 39 | targetType: "inline" 40 | script: | 41 | set -e 42 | mkdir "$(Build.Repository.Name)" 43 | mkdir "mlops-templates" 44 | - checkout: mlops-templates 45 | path: s/templates/ 46 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates 47 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates 48 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates 49 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates 50 | parameters: 51 | cluster_name: cpu-cluster 52 | size: STANDARD_DS3_V2 53 | min_instances: 0 54 | max_instances: 1 55 | cluster_tier: dedicated 56 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates 57 | parameters: 58 | cluster_name: cpu-cluster-lg 59 | size: Standard_D14_v2 60 | min_instances: 0 61 | max_instances: 1 62 | cluster_tier: dedicated 63 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates 64 | parameters: 65 | cluster_name: gpu-cluster 66 | size: Standard_NV6 67 | min_instances: 0 68 | max_instances: 1 69 | cluster_tier: dedicated 70 | - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates 71 | parameters: 72 | environment_name: nlp_summarization_train 73 | environment_file: mlops/azureml/train/train-env.yml 74 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates 75 | parameters: 76 | pipeline_file: mlops/azureml/train/pipeline.yml 77 | experiment_name: $(environment)_nlp_summarization_$(Build.SourceBranchName) 78 | display_name: $(environment)_nlp_summarization_$(Build.BuildID) 79 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | variables: 5 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 6 | # 'main' branch: PRD environment 7 | - template: ../../config-infra-prod.yml 8 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 9 | # 'develop' or feature branches: DEV environment 10 | - template: ../../config-infra-dev.yml 11 | - name: version 12 | value: aml-cli-v2 13 | - name: endpoint_name 14 | value: nlp-online-$(namespace)$(postfix)$(environment) 15 | - name: endpoint_type 16 | value: online 17 | 18 | trigger: none 19 | 20 | pool: 21 | vmImage: ubuntu-20.04 22 | 23 | resources: 24 | repositories: 25 | - repository: mlops-templates # Template Repo 26 | name: mlops-templates 27 | type: git 28 | ref: main 29 | 30 | stages: 31 | - stage: CreateOnlineEndpoint 32 | displayName: Create/Update Online Endpoint 33 | jobs: 34 | - job: DeployOnlineEndpoint 35 | steps: 36 | - checkout: self 37 | path: s/ 38 | - task: Bash@3 39 | displayName: "Create checkout repository folder(s)" 40 | inputs: 41 | targetType: "inline" 42 | script: | 43 | set -e 44 | mkdir "$(Build.Repository.Name)" 45 | mkdir "mlops-templates" 46 | - checkout: mlops-templates 47 | path: s/templates/ 48 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates 49 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates 50 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates 51 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates 52 | parameters: 53 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml 54 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates 55 | parameters: 56 | deployment_name: nlp-summarization-online-dp 57 | deployment_file: mlops/azureml/deploy/online/online-deployment.yml 58 | - template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates 59 | parameters: 60 | traffic_allocation: nlp-summarization-online-dp=100 61 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates 62 | parameters: 63 | deployment_name: nlp-summarization-online-dp 64 | sample_request: data/nlp-summarization-request.json 65 | request_type: json 66 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/mlops/github-actions/deploy-model-training-pipeline.yml: -------------------------------------------------------------------------------- 1 | name: deploy-model-training-pipeline 2 | 3 | on: 4 | workflow_dispatch: 5 | jobs: 6 | get-config: 7 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main 8 | with: 9 | file_name: config-infra-prod.yml 10 | create-compute-standard: 11 | needs: get-config 12 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main 13 | with: 14 | cluster_name: cpu-cluster 15 | size: STANDARD_DS3_V2 16 | min_instances: 0 17 | max_instances: 1 18 | resource_group: ${{ needs.get-config.outputs.resource_group }} 19 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 20 | secrets: 21 | creds: ${{secrets.AZURE_CREDENTIALS}} 22 | create-compute-large: 23 | needs: get-config 24 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main 25 | with: 26 | cluster_name: cpu-cluster-lg 27 | size: Standard_D14_v2 28 | min_instances: 0 29 | max_instances: 1 30 | resource_group: ${{ needs.get-config.outputs.resource_group }} 31 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 32 | secrets: 33 | creds: ${{secrets.AZURE_CREDENTIALS}} 34 | create-compute-gpu: 35 | needs: get-config 36 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main 37 | with: 38 | cluster_name: gpu-cluster 39 | size: Standard_NV6 40 | cluster_tier: low_priority 41 | min_instances: 0 42 | max_instances: 1 43 | resource_group: ${{ needs.get-config.outputs.resource_group }} 44 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 45 | secrets: 46 | creds: ${{secrets.AZURE_CREDENTIALS}} 47 | register-environment: 48 | needs: [get-config,create-compute-standard,create-compute-large,create-compute-gpu] 49 | uses: Azure/mlops-templates/.github/workflows/register-environment.yml@main 50 | with: 51 | resource_group: ${{ needs.get-config.outputs.resource_group }} 52 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 53 | environment_file: mlops/azureml/train/train-env.yaml 54 | secrets: 55 | creds: ${{secrets.AZURE_CREDENTIALS}} 56 | run-pipeline: 57 | needs: [get-config,create-compute-standard,create-compute-large,create-compute-gpu,register-environment] 58 | uses: Azure/mlops-templates/.github/workflows/run-pipeline.yml@main 59 | with: 60 | resource_group: ${{ needs.get-config.outputs.resource_group }} 61 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 62 | parameters-file: mlops/azureml/train/pipeline.yml 63 | job-name: nlp_summerization 64 | secrets: 65 | creds: ${{secrets.AZURE_CREDENTIALS}} 66 | -------------------------------------------------------------------------------- /nlp/aml-cli-v2/mlops/github-actions/deploy-online-endpoint-pipeline.yml: -------------------------------------------------------------------------------- 1 | name: deploy-online-endpoint-pipeline 2 | 3 | on: 4 | workflow_dispatch: 5 | jobs: 6 | get-config: 7 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main 8 | with: 9 | file_name: config-infra-prod.yml 10 | create-endpoint: 11 | needs: get-config 12 | uses: Azure/mlops-templates/.github/workflows/create-endpoint.yml@main 13 | with: 14 | resource_group: ${{ needs.get-config.outputs.resource_group }} 15 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 16 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml 17 | endpoint_name: nlp-summarization-online-dp 18 | endpoint_type: online 19 | secrets: 20 | creds: ${{secrets.AZURE_CREDENTIALS}} 21 | create-deployment: 22 | uses: Azure/mlops-templates/.github/workflows/create-deployment.yml@main 23 | needs: [get-config,create-endpoint] 24 | with: 25 | resource_group: ${{ needs.get-config.outputs.resource_group }} 26 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 27 | endpoint_file: mlops/azureml/deploy/online/online-deployment.yml 28 | endpoint_name: nlp-summarization-online-dp 29 | endpoint_type: online 30 | deployment_name: nlp-summarization-online-dp 31 | secrets: 32 | creds: ${{secrets.AZURE_CREDENTIALS}} 33 | allocate-traffic: 34 | uses: Azure/mlops-templates/.github/workflows/allocate-traffic.yml@main 35 | needs: [get-config,create-deployment] 36 | with: 37 | resource_group: ${{ needs.get-config.outputs.resource_group }} 38 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }} 39 | traffic_allocation: nlp-summarization-online-dp=100 40 | endpoint_name: nlp-summarization-online-dp 41 | secrets: 42 | creds: ${{secrets.AZURE_CREDENTIALS}} 43 | -------------------------------------------------------------------------------- /nlp/python-sdk-v2/data-science/environments/inference/conda_env.yml: -------------------------------------------------------------------------------- 1 | name: nlp_inference_conda_env 2 | channels: 3 | - pytorch 4 | - anaconda 5 | - defaults 6 | - conda-forge 7 | dependencies: 8 | - python=3.8 9 | - pip=21.2.4 10 | - pytorch=1.10.0 11 | - torchvision=0.11.1 12 | - torchaudio=0.10.0 13 | - cudatoolkit=11.1.1 14 | - nvidia-apex=0.1.0 15 | - gxx_linux-64=8.5.0 16 | - pip: 17 | - azureml-defaults==1.39.0 18 | - azureml-mlflow==1.39.0 19 | - azureml-telemetry==1.39.0 20 | - azureml-train-core==1.39.0 21 | - mlflow==1.24.0 22 | - transformers==4.17.0 23 | - 'inference-schema[numpy-support]==1.3.0' 24 | - applicationinsights==0.11.10 25 | -------------------------------------------------------------------------------- /nlp/python-sdk-v2/data-science/environments/training/Dockerfile: -------------------------------------------------------------------------------- 1 | # check release notes https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html 2 | FROM nvcr.io/nvidia/pytorch:22.04-py3 3 | 4 | # Install dependencies missing in this container 5 | # NOTE: container already has matplotlib==3.5.1 tqdm==4.62.0 6 | COPY requirements.txt ./ 7 | RUN pip install -r requirements.txt 8 | -------------------------------------------------------------------------------- /nlp/python-sdk-v2/data-science/environments/training/requirements.txt: -------------------------------------------------------------------------------- 1 | # data science requirements 2 | # torchvision==0.12.0 3 | # torch==1.11.0 4 | pytorch_lightning==1.6.4 5 | transformers==4.18.0 6 | datasets==2.3.2 7 | rouge_score==0.0.4 8 | sentencepiece==0.1.96 9 | 10 | # for metrics reporting/plotting 11 | mlflow==2.3.1 12 | azureml-mlflow==1.41.0 13 | # matplotlib==3.5.2 14 | # tqdm==4.64.0 15 | psutil==5.9.0 16 | 17 | # for unit testing 18 | pytest==7.1.2 19 | 20 | # for azure ml SDK v2 21 | azure-ai-ml==1.1.0 -------------------------------------------------------------------------------- /nlp/python-sdk-v2/data-science/src/summarization/compare.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import logging 4 | import mlflow 5 | import json 6 | from distutils.util import strtobool 7 | 8 | def main(): 9 | """Main function of the script.""" 10 | # initialize root logger 11 | logger = logging.getLogger() 12 | logger.setLevel(logging.INFO) 13 | console_handler = logging.StreamHandler() 14 | formatter = logging.Formatter( 15 | "%(asctime)s : %(levelname)s : %(name)s : %(message)s" 16 | ) 17 | console_handler.setFormatter(formatter) 18 | logger.addHandler(console_handler) 19 | 20 | # input and output arguments 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument( 23 | "--baseline_metrics", 24 | type=str, 25 | required=True, 26 | help="path to baseline metrics folder containing all_results.json", 27 | ) 28 | parser.add_argument( 29 | "--candidate_metrics", 30 | type=str, 31 | required=True, 32 | help="path to candidate metrics folder containing all_results.json", 33 | ) 34 | parser.add_argument( 35 | "--reference_metric", 36 | type=str, 37 | default="predict_rougeLsum", 38 | help="name of reference metric for shipping flag (default: predict_rougeLsum)", 39 | ) 40 | parser.add_argument( 41 | "--force_comparison", type=strtobool, default=False, help="set to True to bypass comparison and set --deploy_flag to True" 42 | ) 43 | parser.add_argument( 44 | "--deploy_flag", type=str, help="a deploy flag whether to deploy or not" 45 | ) 46 | 47 | args = parser.parse_args() 48 | 49 | # Start Logging 50 | mlflow.start_run() 51 | 52 | logger.info(f"Running with arguments: {args}") 53 | 54 | # open metrics on both sides 55 | with open(os.path.join(args.baseline_metrics, "all_results.json")) as in_file: 56 | baseline_metrics = json.loads(in_file.read()) 57 | with open(os.path.join(args.candidate_metrics, "all_results.json")) as in_file: 58 | candidate_metrics = json.loads(in_file.read()) 59 | 60 | # should we ship or not? 61 | if args.force_comparison: 62 | deploy_flag = True 63 | else: 64 | deploy_flag = ( 65 | candidate_metrics[args.reference_metric] 66 | > baseline_metrics[args.reference_metric] 67 | ) 68 | 69 | logger.info("baseline_metrics[{}]={}, candidate_metrics[{}]={}, deploy_flag={} (force_comparison={})".format( 70 | args.reference_metric, 71 | baseline_metrics[args.reference_metric], 72 | args.reference_metric, 73 | candidate_metrics[args.reference_metric], 74 | deploy_flag, 75 | args.force_comparison 76 | )) 77 | 78 | # save deploy_flag as a file 79 | os.makedirs(args.deploy_flag, exist_ok=True) 80 | with open(os.path.join(args.deploy_flag, "deploy_flag"), "w") as out_file: 81 | out_file.write("%d" % int(deploy_flag)) 82 | 83 | # Stop Logging 84 | mlflow.end_run() 85 | 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /nlp/python-sdk-v2/data-science/src/summarization/register.py: -------------------------------------------------------------------------------- 1 | from azureml.core import Run 2 | from azureml.core.model import Model 3 | 4 | import os 5 | import argparse 6 | import logging 7 | import mlflow 8 | 9 | 10 | def main(): 11 | """Main function of the script.""" 12 | # initialize root logger 13 | logger = logging.getLogger() 14 | logger.setLevel(logging.INFO) 15 | console_handler = logging.StreamHandler() 16 | formatter = logging.Formatter( 17 | "%(asctime)s : %(levelname)s : %(name)s : %(message)s" 18 | ) 19 | console_handler.setFormatter(formatter) 20 | logger.addHandler(console_handler) 21 | 22 | # input and output arguments 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument( 25 | "--model_folder", 26 | type=str, 27 | required=True, 28 | help="folder containing model", 29 | ) 30 | parser.add_argument( 31 | "--register_as", 32 | type=str, 33 | required=True, 34 | help="name to use for model registration in AzureML", 35 | ) 36 | parser.add_argument( 37 | "--deploy_flag", type=str, required=True, help="a deploy flag whether to deploy or not" 38 | ) 39 | 40 | args = parser.parse_args() 41 | logger.info(f"Running with arguments: {args}") 42 | 43 | # Start Logging 44 | mlflow.start_run() 45 | 46 | if os.path.isfile(args.deploy_flag): 47 | deploy_flag_file_path = args.deploy_flag 48 | else: 49 | deploy_flag_file_path = os.path.join(args.deploy_flag, "deploy_flag") 50 | 51 | logger.info(f"Opening deploy_flag file from {deploy_flag_file_path}") 52 | with open(deploy_flag_file_path, 'rb') as in_file: 53 | deploy_flag = bool(int(in_file.read())) 54 | 55 | if deploy_flag: 56 | logger.info(f"Deploy flag is True, registering model as {args.register_as}...") 57 | run = Run.get_context() 58 | 59 | # if we're running locally, except 60 | if run.__class__.__name__ == "_OfflineRun": 61 | raise Exception("You can't run this script locally, you will need to run it as an AzureML job.") 62 | 63 | _ = Model.register( 64 | run.experiment.workspace, 65 | model_name=args.register_as, 66 | model_path=args.model_folder, 67 | tags={ 68 | "type": "huggingface", 69 | "task": "summarization" 70 | }, 71 | description="Huggingface model finetuned for summarization", 72 | ) 73 | else: 74 | logger.info(f"Deploy flag is False, pass.") 75 | 76 | # Stop Logging 77 | mlflow.end_run() 78 | 79 | 80 | if __name__ == "__main__": 81 | main() 82 | 83 | -------------------------------------------------------------------------------- /nlp/python-sdk-v2/data-science/src/summarization/score.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import json 4 | from transformers import ( 5 | AutoModelForSeq2SeqLM, 6 | AutoTokenizer, 7 | ) 8 | 9 | def init(): 10 | """ 11 | This function is called when the container is initialized/started, typically after create/update of the deployment. 12 | You can write the logic here to perform init operations like caching the model in memory 13 | """ 14 | global model, tokenizer 15 | # AZUREML_MODEL_DIR is an environment variable created during deployment. 16 | # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION) 17 | model_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), os.listdir(os.getenv("AZUREML_MODEL_DIR"))[0]) 18 | print("model_path") 19 | print(os.listdir(model_path)) 20 | model = AutoModelForSeq2SeqLM.from_pretrained(model_path) 21 | tokenizer = AutoTokenizer.from_pretrained(model_path) 22 | print("Init complete") 23 | 24 | 25 | def run(raw_data): 26 | global model, tokenizer 27 | """ 28 | This function is called for every invocation of the endpoint to perform the actual scoring/prediction. 29 | In the example we extract the data from the json input and call the scikit-learn model's predict() 30 | method and return the result back 31 | """ 32 | logging.info("Request received") 33 | article = json.loads(raw_data)["data"] 34 | if "t5" in model.config.architectures[0].lower(): 35 | article= "summarize:" + article 36 | 37 | inputs = tokenizer(article, return_tensors="pt", max_length=512, truncation=True) 38 | outputs = model.generate( 39 | inputs["input_ids"], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True 40 | ) 41 | result = tokenizer.decode(outputs[0]) 42 | print(result) 43 | logging.info("Request processed") 44 | return result 45 | -------------------------------------------------------------------------------- /nlp/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name: deploy-model-training-pipeline 5 | 6 | variables: 7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}: 8 | # 'main' branch: PRD environment 9 | - template: ../../config-infra-prod.yml 10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}: 11 | # 'develop' or feature branches: DEV environment 12 | - template: ../../config-infra-dev.yml 13 | - name: version 14 | value: python-sdk-v2 15 | 16 | trigger: none 17 | 18 | pool: 19 | vmImage: ubuntu-20.04 20 | 21 | resources: 22 | repositories: 23 | - repository: mlops-templates # Template Repo 24 | name: mlops-templates 25 | type: git 26 | ref: main 27 | 28 | stages: 29 | - stage: DeployTrainingPipeline 30 | displayName: Deploy Training Pipeline 31 | jobs: 32 | - job: DeployTrainingPipeline 33 | steps: 34 | - checkout: self 35 | path: s/ 36 | - task: Bash@3 37 | displayName: "Create checkout repository folder(s)" 38 | inputs: 39 | targetType: "inline" 40 | script: | 41 | set -e 42 | mkdir "$(Build.Repository.Name)" 43 | mkdir "mlops-templates" 44 | - checkout: mlops-templates 45 | path: s/templates/ 46 | - template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates 47 | - template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates 48 | - template: templates/python-sdk-v2/install-requirements.yml@mlops-templates 49 | - template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates 50 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates 51 | parameters: 52 | cluster_name: cpu-cluster 53 | size: STANDARD_DS3_V2 54 | min_instances: 0 55 | max_instances: 1 56 | cluster_tier: dedicated 57 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates 58 | parameters: 59 | cluster_name: cpu-cluster-lg 60 | size: Standard_D14_v2 61 | min_instances: 0 62 | max_instances: 1 63 | cluster_tier: dedicated 64 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates 65 | parameters: 66 | cluster_name: gpu-cluster 67 | size: Standard_NV6 68 | min_instances: 0 69 | max_instances: 1 70 | cluster_tier: dedicated 71 | - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates 72 | parameters: 73 | build_type: docker 74 | environment_name: nlp_summarization_train 75 | environment_path: data-science/environments/training 76 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates 77 | parameters: 78 | pipeline_path: mlops/azureml/train/pipeline-train.py 79 | experiment_name: $(environment)_nlp_summarization_$(Build.SourceBranchName) 80 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | black==22.3.0 2 | flake8==4.0.1 3 | isort==5.10.1 4 | pre-commit==2.19.0 5 | --------------------------------------------------------------------------------