├── .devcontainer
├── Dockerfile
└── devcontainer.json
├── .github
├── ISSUE_TEMPLATE
│ ├── config.yml
│ ├── repository-issue.md
│ └── solution-accelerator-request.md
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ └── codeql.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── classical
├── README.md
├── aml-cli-v2
│ ├── data-science
│ │ ├── environment
│ │ │ └── train-conda.yml
│ │ └── src
│ │ │ ├── evaluate.py
│ │ │ ├── prep.py
│ │ │ ├── register.py
│ │ │ └── train.py
│ ├── data
│ │ ├── taxi-batch.csv
│ │ ├── taxi-data.csv
│ │ └── taxi-request.json
│ └── mlops
│ │ ├── azureml
│ │ ├── deploy
│ │ │ ├── batch
│ │ │ │ ├── batch-deployment.yml
│ │ │ │ └── batch-endpoint.yml
│ │ │ └── online
│ │ │ │ ├── online-deployment.yml
│ │ │ │ ├── online-endpoint.yml
│ │ │ │ └── score.py
│ │ └── train
│ │ │ ├── data.yml
│ │ │ ├── pipeline.yml
│ │ │ └── train-env.yml
│ │ ├── devops-pipelines
│ │ ├── deploy-batch-endpoint-pipeline.yml
│ │ ├── deploy-model-training-pipeline.yml
│ │ └── deploy-online-endpoint-pipeline.yml
│ │ └── github-actions
│ │ ├── deploy-batch-endpoint-pipeline-classical.yml
│ │ ├── deploy-model-training-pipeline-classical.yml
│ │ └── deploy-online-endpoint-pipeline-classical.yml
├── python-sdk-v1
│ ├── config-aml.yml
│ ├── data-science
│ │ ├── environment
│ │ │ ├── batch.yml
│ │ │ ├── batch_monitor.yml
│ │ │ ├── train.yml
│ │ │ └── train_monitor.yml
│ │ ├── notebooks
│ │ │ └── experiment1.ipynb
│ │ ├── src
│ │ │ ├── evaluate.py
│ │ │ ├── prep.py
│ │ │ ├── score.py
│ │ │ └── train.py
│ │ └── tests
│ │ │ └── test.py
│ ├── data
│ │ ├── scoring
│ │ │ └── credit_batch.csv
│ │ └── training
│ │ │ └── credit.csv
│ └── mlops
│ │ └── devops-pipelines
│ │ ├── deploy-drift-detection.yml
│ │ ├── deploy-model-batch-scoring.yml
│ │ └── deploy-model-training-pipeline.yml
├── python-sdk-v2
│ ├── data-science
│ │ ├── environment
│ │ │ └── train-conda.yml
│ │ └── src
│ │ │ ├── evaluate
│ │ │ └── evaluate.py
│ │ │ ├── prep
│ │ │ └── prep.py
│ │ │ ├── register
│ │ │ └── register.py
│ │ │ └── train
│ │ │ └── train.py
│ ├── data
│ │ ├── taxi-batch.csv
│ │ ├── taxi-data.csv
│ │ └── taxi-request.json
│ └── mlops
│ │ ├── azureml
│ │ └── train
│ │ │ └── run_pipeline.py
│ │ └── devops-pipelines
│ │ ├── deploy-batch-endpoint-pipeline.yml
│ │ ├── deploy-model-training-pipeline.yml
│ │ └── deploy-online-endpoint-pipeline.yml
└── rai-aml-cli-v2
│ ├── data-science
│ ├── environment
│ │ ├── train-conda.yml
│ │ └── train-requirements.txt
│ ├── experiment
│ │ ├── evaluate.ipynb
│ │ ├── prep.ipynb
│ │ ├── register.ipynb
│ │ ├── requirements.txt
│ │ └── train.ipynb
│ └── src
│ │ ├── evaluate
│ │ ├── evaluate.py
│ │ └── test_evaluate.py
│ │ ├── prep
│ │ ├── prep.py
│ │ └── test_prep.py
│ │ ├── register
│ │ └── register.py
│ │ └── train
│ │ ├── test_train.py
│ │ └── train.py
│ ├── data
│ ├── taxi-batch.csv
│ ├── taxi-data.csv
│ └── taxi-request.json
│ └── mlops
│ ├── azureml
│ ├── deploy
│ │ ├── batch
│ │ │ ├── batch-deployment.yml
│ │ │ └── batch-endpoint.yml
│ │ └── online
│ │ │ ├── online-deployment.yml
│ │ │ ├── online-endpoint.yml
│ │ │ └── score.py
│ └── train
│ │ ├── pipeline.yml
│ │ └── train-env.yml
│ └── devops-pipelines
│ ├── deploy-batch-endpoint-pipeline.yml
│ ├── deploy-model-training-pipeline.yml
│ ├── deploy-online-endpoint-pipeline.yml
│ ├── register-rai-components.yml
│ └── trigger_.code-search
├── config-infra-dev.yml
├── config-infra-prod.yml
├── cv
├── README.md
├── aml-cli-v2
│ ├── data-science
│ │ ├── environment
│ │ │ ├── Dockerfile
│ │ │ ├── ndv4-topo.xml
│ │ │ └── requirements.txt
│ │ ├── requirements-tests.txt
│ │ ├── src
│ │ │ ├── image_io.py
│ │ │ ├── model
│ │ │ │ ├── __init__.py
│ │ │ │ ├── model_loader.py
│ │ │ │ ├── swin_models.py
│ │ │ │ ├── test_model.py
│ │ │ │ └── torchvision_models.py
│ │ │ ├── profiling.py
│ │ │ └── train.py
│ │ └── tests
│ │ │ ├── conftest.py
│ │ │ ├── model
│ │ │ └── test_model_loader.py
│ │ │ └── test_train.py
│ ├── data
│ │ └── sample-request.json
│ └── mlops
│ │ ├── azureml
│ │ ├── deploy
│ │ │ └── online
│ │ │ │ ├── online-deployment.yml
│ │ │ │ └── online-endpoint.yml
│ │ └── train
│ │ │ ├── create_stanford_dogs_dataset.yaml
│ │ │ ├── pipeline.yaml
│ │ │ ├── train-env.yaml
│ │ │ └── train.yaml
│ │ ├── devops-pipelines
│ │ ├── deploy-model-training-pipeline.yml
│ │ └── deploy-online-endpoint-pipeline.yml
│ │ └── github-actions
│ │ ├── deploy-model-training-pipeline.yml
│ │ └── deploy-online-endpoint-pipeline.yml
└── python-sdk-v1
│ ├── config-aml.yml
│ ├── data-science
│ ├── environment
│ │ └── training
│ │ │ ├── azureml_environment.json
│ │ │ └── conda_dependencies.yml
│ ├── notebooks
│ │ └── .gitkeep
│ ├── src
│ │ ├── evaluate.py
│ │ ├── model
│ │ │ ├── __init__.py
│ │ │ ├── dataset.py
│ │ │ └── net.py
│ │ ├── prep.py
│ │ └── train.py
│ └── tests
│ │ └── .gitkeep
│ ├── data
│ └── training
│ │ └── image_labels.csv
│ └── mlops
│ └── devops-pipelines
│ └── deploy-model-training-pipeline.yml
├── environment.yml
├── infrastructure
├── bicep
│ ├── bicepconfig.json
│ ├── main.bicep
│ ├── main.json
│ ├── modules
│ │ ├── aml_computecluster.bicep
│ │ ├── aml_workspace.bicep
│ │ ├── application_insights.bicep
│ │ ├── container_registry.bicep
│ │ ├── key_vault.bicep
│ │ └── storage_account.bicep
│ └── pipelines
│ │ └── bicep-ado-deploy-infra.yml
└── terraform
│ ├── aml_deploy.tf
│ ├── devops-pipelines
│ └── tf-ado-deploy-infra.yml
│ ├── github-actions
│ └── tf-gha-deploy-infra.yml
│ ├── locals.tf
│ ├── main.tf
│ ├── modules
│ ├── aml-workspace
│ │ ├── main.tf
│ │ ├── outputs.tf
│ │ └── variables.tf
│ ├── application-insights
│ │ ├── main.tf
│ │ ├── outputs.tf
│ │ └── variables.tf
│ ├── container-registry
│ │ ├── main.tf
│ │ ├── outputs.tf
│ │ └── variables.tf
│ ├── data-explorer
│ │ ├── main.tf
│ │ ├── outputs.tf
│ │ └── variables.tf
│ ├── key-vault
│ │ ├── main.tf
│ │ ├── outputs.tf
│ │ └── variables.tf
│ ├── resource-group
│ │ ├── main.tf
│ │ ├── outputs.tf
│ │ └── variables.tf
│ └── storage-account
│ │ ├── main.tf
│ │ ├── outputs.tf
│ │ └── variables.tf
│ └── variables.tf
├── nlp
├── README.md
├── aml-cli-v2
│ ├── data-science
│ │ ├── environments
│ │ │ ├── inference
│ │ │ │ └── conda_env.yml
│ │ │ └── training
│ │ │ │ ├── Dockerfile
│ │ │ │ └── requirements.txt
│ │ └── src
│ │ │ └── summarization
│ │ │ ├── compare.py
│ │ │ ├── prepare.py
│ │ │ ├── register.py
│ │ │ ├── run.py
│ │ │ └── score.py
│ ├── data
│ │ └── nlp-summarization-request.json
│ └── mlops
│ │ ├── azureml
│ │ ├── deploy
│ │ │ └── online
│ │ │ │ ├── online-deployment.yml
│ │ │ │ └── online-endpoint.yml
│ │ └── train
│ │ │ ├── pipeline.yml
│ │ │ └── train-env.yml
│ │ ├── devops-pipelines
│ │ ├── deploy-model-training-pipeline.yml
│ │ └── deploy-online-endpoint-pipeline.yml
│ │ └── github-actions
│ │ ├── deploy-model-training-pipeline.yml
│ │ └── deploy-online-endpoint-pipeline.yml
└── python-sdk-v2
│ ├── data-science
│ ├── environments
│ │ ├── inference
│ │ │ └── conda_env.yml
│ │ └── training
│ │ │ ├── Dockerfile
│ │ │ └── requirements.txt
│ └── src
│ │ └── summarization
│ │ ├── compare.py
│ │ ├── prepare.py
│ │ ├── register.py
│ │ ├── run.py
│ │ └── score.py
│ ├── data
│ └── nlp-summarization-request.json
│ └── mlops
│ ├── azureml
│ └── train
│ │ └── pipeline-train.py
│ └── devops-pipelines
│ └── deploy-model-training-pipeline.yml
└── requirements.txt
/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
1 | # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.209.6/containers/python-3-miniconda/.devcontainer/base.Dockerfile
2 | FROM mcr.microsoft.com/vscode/devcontainers/miniconda:0.202.1-3
3 |
4 | # Update the conda environment according to the environment.yml file in the project.
5 | COPY environment.yml /tmp/conda-tmp/
6 | RUN /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml && rm -rf /tmp/conda-tmp
--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.222.0/containers/python-3-miniconda
3 | {
4 | "name": "Miniconda (Python 3)",
5 | "build": {
6 | "context": "..",
7 | "dockerfile": "Dockerfile",
8 | },
9 | // Set *default* container specific settings.json values on container create.
10 | "settings": {
11 | "python.defaultInterpreterPath": "/opt/conda/bin/python",
12 | },
13 | // Add the IDs of extensions you want installed when the container is created.
14 | "extensions": [
15 | "ms-python.python",
16 | "ms-python.vscode-pylance",
17 | "ms-toolsai.vscode-ai",
18 | ],
19 | // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
20 | "remoteUser": "vscode",
21 | "features": {
22 | "azure-cli": "latest"
23 | },
24 | "onCreateCommand": "az extension add -n ml -y"
25 | }
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 | - name: MLOps v2 solution accelerators discussions.
4 | url: https://github.com/azure/mlops-v2/discussions
5 | about: >-
6 | Please ask questions and start open-ended discussions here.
7 | Use issues for well-defined work in the solution accelerator repositories.
8 | - name: Azure ML CLI issues.
9 | url: https://github.com/azure/azure-cli-extensions/issues/new/choose
10 | about: Please open issues with the Azure ML CLI extension here.
11 | - name: Azure ML Python SDK issues.
12 | url: https://github.com/azure/azure-sdk-for-python/issues/new/choose
13 | about: Please open issues with the Azure ML Python SDK here.
14 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/repository-issue.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Suggest an enhancement for this repository.
3 | about: Have an idea for improvements to this repository?
4 | title: '[repo]
'
5 | labels: ''
6 | assignees: ''
7 | ---
8 |
9 | ## Why?
10 |
11 |
12 |
13 | ## How?
14 |
15 |
16 |
17 | ## Anything else?
18 |
19 |
22 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/solution-accelerator-request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Request or suggest a new solution accelerator.
3 | about: Have an idea for a new solution accelerator?
4 | title: '[new accelerator] '
5 | labels: ''
6 | assignees: ''
7 | ---
8 |
9 | ## Why doesn't an existing solution accelerator work?
10 |
11 |
12 |
13 | ## What work is needed?
14 |
15 |
20 |
21 | ## Anything else?
22 |
23 |
26 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # PR into Azure/mlops-project-template
2 |
3 | ## Checklist
4 |
5 | I have:
6 |
7 | - [ ] read and followed the contributing guidelines
8 |
9 | ## Changes
10 |
11 | -
12 |
13 | fixes #
14 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | name: "CodeQL"
2 |
3 | on:
4 | push:
5 | branches: [ "main", main*, feature* ]
6 | pull_request:
7 | # The branches below must be a subset of the branches above
8 | branches: [ "main" ]
9 | schedule:
10 | - cron: '0 3 * * 3'
11 |
12 | jobs:
13 | analyze:
14 | name: Analyze
15 | runs-on: ubuntu-latest
16 | permissions:
17 | actions: read
18 | contents: read
19 | security-events: write
20 |
21 | strategy:
22 | fail-fast: false
23 | matrix:
24 | language: [ 'python' ]
25 |
26 | steps:
27 | - name: Checkout repository
28 | uses: actions/checkout@v3
29 |
30 | # Initializes the CodeQL tools for scanning.
31 | - name: Initialize CodeQL
32 | uses: github/codeql-action/init@v2
33 | with:
34 | languages: ${{ matrix.language }}
35 | queries: security-and-quality
36 |
37 | - name: Perform CodeQL Analysis
38 | uses: github/codeql-action/analyze@v2
39 | with:
40 | category: "/language:${{matrix.language}}"
41 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # Mac stuff
7 | .DS_Store
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | pip-wheel-metadata/
25 | share/python-wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .nox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | *.py,cover
52 | .hypothesis/
53 | .pytest_cache/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # pipenv
89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
92 | # install all needed dependencies.
93 | #Pipfile.lock
94 |
95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
96 | __pypackages__/
97 |
98 | # Celery stuff
99 | celerybeat-schedule
100 | celerybeat.pid
101 |
102 | # SageMath parsed files
103 | *.sage.py
104 |
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 |
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 |
118 | # Rope project settings
119 | .ropeproject
120 |
121 | # mkdocs documentation
122 | /site
123 |
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 |
129 | # Pyre type checker
130 | .pyre/
131 |
132 | # Terraform
133 | .terraform.lock.hcl
134 | terraform.tfstate
135 | terraform.tfstate.backup
136 | .terraform.tfstate.lock.info
137 | .terraform
138 | terraform.tfvars
139 |
140 | /infrastructure/bicep/main.json
141 | ! /infrastructure/bicep/bicepconfig.json
142 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v4.2.0
4 | hooks:
5 | - id: check-yaml
6 | - id: end-of-file-fixer
7 | - id: trailing-whitespace
8 |
9 | # Opinionated code formatter to forget about formatting
10 | - repo: https://github.com/psf/black
11 | rev: 21.12b0
12 | hooks:
13 | - id: black
14 | additional_dependencies: ['click==8.0.4']
15 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Microsoft Open Source Code of Conduct
2 |
3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
4 |
5 | Resources:
6 |
7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Microsoft Corporation.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Azure MLOps (v2) solution accelerator
2 |
3 | [Main README file](https://github.com/Azure/mlops-v2/blob/main/README.md)
4 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Security
4 |
5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
6 |
7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
8 |
9 | ## Reporting Security Issues
10 |
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 |
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 |
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 |
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
18 |
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 |
21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 | * Full paths of source file(s) related to the manifestation of the issue
23 | * The location of the affected source code (tag/branch/commit or direct URL)
24 | * Any special configuration required to reproduce the issue
25 | * Step-by-step instructions to reproduce the issue
26 | * Proof-of-concept or exploit code (if possible)
27 | * Impact of the issue, including how an attacker might exploit the issue
28 |
29 | This information will help us triage your report more quickly.
30 |
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 |
33 | ## Preferred Languages
34 |
35 | We prefer all communications to be in English.
36 |
37 | ## Policy
38 |
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 |
41 |
42 |
--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
1 | # TODO: The maintainer of this repo has not yet edited this file
2 |
3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
4 |
5 | - **No CSS support:** Fill out this template with information about how to file issues and get help.
6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps.
7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide.
8 |
9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
10 |
11 | # Support
12 |
13 | ## How to file issues and get help
14 |
15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing
16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or
17 | feature request as a new Issue.
18 |
19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE
20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
22 |
23 | ## Microsoft Support Policy
24 |
25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
26 |
--------------------------------------------------------------------------------
/classical/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | # Azure MLOps (v2) Pattern: Azure Machine Learning - Classical Machine Learning
4 |
5 | This repository includes all use case specific codebase to be deployed as the inner loop for the [MLOps v2](https://github.com/Azure/mlops-v2) solution accelerator.
6 |
7 | The repo itself functions as a standalone entity that agnosticly holds all Azure Machine Learning - Classical Machine Learning requirements for this architectual pattern.
8 |
9 |
10 | ## 📐 Pattern Architectures: Key concepts
11 |
12 | This repository follows the architecture linked below:
13 |
14 | | Link | AI Pattern |
15 | | ------------------------------------------------------- | ----------------------------------------------------------------------- |
16 | | [Pattern AML CML](https://github.com/Azure/mlops-v2/blob/main/documentation/architecturepattern/AzureML_CML_Architecture.png) | Azure Machine Learning - Classical Machine Learning |
17 |
18 |
19 | ## 👤 Getting started
20 |
21 | Please visit [MLOps v2](https://github.com/Azure/mlops-v2) for the initial deployment of this inner loop pattern.
22 |
23 |
24 | ## ‼️ Feedback or Issues
25 |
26 | Please visit [MLOps v2](https://github.com/Azure/mlops-v2) and file an **issue** or go to Microsofts internal SharePoint site to hand in any feedback.
27 |
28 |
29 | ## Contributing
30 |
31 | This project welcomes contributions and suggestions. To learn more visit the contributing section in the [MLOps v2](https://github.com/Azure/mlops-v2) solution accelerator.
32 |
33 | Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
34 |
35 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
36 |
37 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
38 |
39 |
40 | ## Trademarks
41 |
42 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
43 | trademarks or logos is subject to and must follow
44 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/legal/intellectualproperty/trademarks/usage/general).
45 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
46 | Any use of third-party trademarks or logos are subject to those third-party's policies.
47 |
--------------------------------------------------------------------------------
/classical/aml-cli-v2/data-science/environment/train-conda.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - defaults
3 | - anaconda
4 | - conda-forge
5 | dependencies:
6 | - python=3.7.5
7 | - pip
8 | - pip:
9 | - azureml-mlflow==1.38.0
10 | - azure-ai-ml==1.0.0
11 | - pyarrow==10.0.0
12 | - scikit-learn==0.24.1
13 | - pandas==1.2.1
14 | - joblib==1.0.0
15 | - matplotlib==3.3.3
16 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
17 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
--------------------------------------------------------------------------------
/classical/aml-cli-v2/data-science/src/register.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 | """
4 | Registers trained ML model if deploy flag is True.
5 | """
6 |
7 | import argparse
8 | from pathlib import Path
9 | import pickle
10 | import mlflow
11 |
12 | import os
13 | import json
14 |
15 | def parse_args():
16 | '''Parse input arguments'''
17 |
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('--model_name', type=str, help='Name under which model will be registered')
20 | parser.add_argument('--model_path', type=str, help='Model directory')
21 | parser.add_argument('--evaluation_output', type=str, help='Path of eval results')
22 | parser.add_argument(
23 | "--model_info_output_path", type=str, help="Path to write model info JSON"
24 | )
25 | args, _ = parser.parse_known_args()
26 | print(f'Arguments: {args}')
27 |
28 | return args
29 |
30 |
31 | def main(args):
32 | '''Loads model, registers it if deply flag is True'''
33 |
34 | with open((Path(args.evaluation_output) / "deploy_flag"), 'rb') as infile:
35 | deploy_flag = int(infile.read())
36 |
37 | mlflow.log_metric("deploy flag", int(deploy_flag))
38 | deploy_flag=1
39 | if deploy_flag==1:
40 |
41 | print("Registering ", args.model_name)
42 |
43 | # load model
44 | model = mlflow.sklearn.load_model(args.model_path)
45 |
46 | # log model using mlflow
47 | mlflow.sklearn.log_model(model, args.model_name)
48 |
49 | # register logged model using mlflow
50 | run_id = mlflow.active_run().info.run_id
51 | model_uri = f'runs:/{run_id}/{args.model_name}'
52 | mlflow_model = mlflow.register_model(model_uri, args.model_name)
53 | model_version = mlflow_model.version
54 |
55 | # write model info
56 | print("Writing JSON")
57 | dict = {"id": "{0}:{1}".format(args.model_name, model_version)}
58 | output_path = os.path.join(args.model_info_output_path, "model_info.json")
59 | with open(output_path, "w") as of:
60 | json.dump(dict, fp=of)
61 |
62 | else:
63 | print("Model will not be registered!")
64 |
65 | if __name__ == "__main__":
66 |
67 | mlflow.start_run()
68 |
69 | # ---------- Parse Arguments ----------- #
70 | # -------------------------------------- #
71 |
72 | args = parse_args()
73 |
74 | lines = [
75 | f"Model name: {args.model_name}",
76 | f"Model path: {args.model_path}",
77 | f"Evaluation output path: {args.evaluation_output}",
78 | ]
79 |
80 | for line in lines:
81 | print(line)
82 |
83 | main(args)
84 |
85 | mlflow.end_run()
86 |
--------------------------------------------------------------------------------
/classical/aml-cli-v2/data/taxi-request.json:
--------------------------------------------------------------------------------
1 | {"input_data": [[2.86,40.66551971,-73.98258972,1,40.69801331,-73.97357178,0,2,1,1,19,21,3,56,1,1,19,21,21,57],
2 | [3.98,40.68072128,-73.931633,1,40.6909523,-73.99185181,0,2,0,1,4,21,44,11,0,1,4,21,59,35]]
3 | }
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/azureml/deploy/batch/batch-deployment.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
2 | name: batch-dp
3 | endpoint_name: taxi-fare-batch
4 | model: azureml:taxi-model@latest
5 | compute: azureml:batch-cluster
6 | resources:
7 | instance_count: 1
8 | max_concurrency_per_instance: 2
9 | mini_batch_size: 10
10 | output_action: append_row
11 | output_file_name: predictions.csv
12 | retry_settings:
13 | max_retries: 3
14 | timeout: 30
15 | error_threshold: -1
16 | logging_level: info
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/azureml/deploy/batch/batch-endpoint.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/batchEndpoint.schema.json
2 | name: taxi-fare-batch
3 | description: taxi cost batch endpoint
4 | auth_mode: aad_token
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/azureml/deploy/online/online-deployment.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
2 | name: blue
3 | endpoint_name: taxi-fare-online
4 | model: azureml:taxi-model@latest
5 | instance_type: Standard_DS3_v2
6 | instance_count: 1
7 |
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/azureml/deploy/online/online-endpoint.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
2 | name: taxi-fare-online
3 | description: taxi cost online endpoint
4 | auth_mode: key
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/azureml/deploy/online/score.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/classical/aml-cli-v2/mlops/azureml/deploy/online/score.py
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/azureml/train/data.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/data.schema.json
2 |
3 | # Supported paths include:
4 | # local: ./
5 | # blob: https://.blob.core.windows.net//
6 | # ADLS gen2: abfss://@.dfs.core.windows.net//
7 | # Datastore: azureml://datastores//paths/
8 | type: uri_file
9 | name: taxi-data
10 | description: taxi dataset
11 | path: ../../../data/taxi-data.csv
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/azureml/train/pipeline.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
2 | type: pipeline
3 | experiment_name: taxi-fare-training
4 | experiment_name: taxi-fare-training
5 | description: Training Pipeline to train a model that predicts taxi fare price
6 |
7 | #
8 | inputs:
9 | input:
10 | type: uri_file
11 | path: azureml:taxi-data@latest
12 | enable_monitoring: 'false'
13 | table_name: 'taximonitoring'
14 |
15 | outputs:
16 | train_data:
17 | val_data:
18 | test_data:
19 | trained_model:
20 | evaluation_output:
21 | model_info_output_path:
22 | #
23 |
24 | #
25 | settings:
26 | default_datastore: azureml:workspaceblobstore
27 | default_compute: azureml:cpu-cluster
28 | continue_on_step_failure: false
29 |
30 | jobs:
31 | prep_data:
32 | name: prep_data
33 | display_name: prep-data
34 | code: ../../../data-science/src
35 | command: >-
36 | python prep.py
37 | --raw_data ${{inputs.raw_data}}
38 | --train_data ${{outputs.train_data}}
39 | --val_data ${{outputs.val_data}}
40 | --test_data ${{outputs.test_data}}
41 | --enable_monitoring ${{inputs.enable_monitoring}}
42 | --table_name ${{inputs.table_name}}
43 | environment: azureml:taxi-train-env@latest
44 | inputs:
45 | raw_data: ${{parent.inputs.input}}
46 | enable_monitoring: ${{parent.inputs.enable_monitoring}}
47 | table_name: ${{parent.inputs.table_name}}
48 | outputs:
49 | train_data: ${{parent.outputs.train_data}}
50 | val_data: ${{parent.outputs.val_data}}
51 | test_data: ${{parent.outputs.test_data}}
52 |
53 | train_model:
54 | name: train_model
55 | display_name: train-model
56 | code: ../../../data-science/src
57 | command: >-
58 | python train.py
59 | --train_data ${{inputs.train_data}}
60 | --model_output ${{outputs.model_output}}
61 | environment: azureml:taxi-train-env@latest
62 | inputs:
63 | train_data: ${{parent.jobs.prep_data.outputs.train_data}}
64 | outputs:
65 | model_output: ${{parent.outputs.trained_model}}
66 |
67 | evaluate_model:
68 | name: evaluate_model
69 | display_name: evaluate-model
70 | code: ../../../data-science/src
71 | command: >-
72 | python evaluate.py
73 | --model_name ${{inputs.model_name}}
74 | --model_input ${{inputs.model_input}}
75 | --test_data ${{inputs.test_data}}
76 | --evaluation_output ${{outputs.evaluation_output}}
77 | environment: azureml:taxi-train-env@latest
78 | inputs:
79 | model_name: "taxi-model"
80 | model_input: ${{parent.jobs.train_model.outputs.model_output}}
81 | test_data: ${{parent.jobs.prep_data.outputs.test_data}}
82 | outputs:
83 | evaluation_output: ${{parent.outputs.evaluation_output}}
84 |
85 | register_model:
86 | name: register_model
87 | display_name: register-model
88 | code: ../../../data-science/src
89 | command: >-
90 | python register.py
91 | --model_name ${{inputs.model_name}}
92 | --model_path ${{inputs.model_path}}
93 | --evaluation_output ${{inputs.evaluation_output}}
94 | --model_info_output_path ${{outputs.model_info_output_path}}
95 | environment: azureml:taxi-train-env@latest
96 | inputs:
97 | model_name: "taxi-model"
98 | model_path: ${{parent.jobs.train_model.outputs.model_output}}
99 | evaluation_output: ${{parent.jobs.evaluate_model.outputs.evaluation_output}}
100 | outputs:
101 | model_info_output_path: ${{parent.outputs.model_info_output_path}}
102 | #
103 |
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/azureml/train/train-env.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
2 | name: taxi-train-env
3 | image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
4 | conda_file: ../../../data-science/environment/train-conda.yml
5 | description: Environment created from a Docker image plus Conda environment to train taxi model.
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-batch-endpoint-pipeline
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 | - name: version
14 | value: aml-cli-v2
15 | - name: endpoint_name
16 | value: taxi-batch-$(namespace)$(postfix)$(environment)
17 | - name: endpoint_type
18 | value: batch
19 |
20 | trigger: none
21 |
22 | pool:
23 | vmImage: ubuntu-20.04
24 |
25 | resources:
26 | repositories:
27 | - repository: mlops-templates # Template Repo
28 | name: mlops-templates
29 | type: git
30 | ref: main
31 |
32 | stages:
33 | - stage: CreateBatchEndpoint
34 | displayName: Create/Update Batch Endpoint
35 | jobs:
36 | - job: DeployBatchEndpoint
37 | steps:
38 | - checkout: self
39 | path: s/
40 | - task: Bash@3
41 | displayName: "Create checkout repository folder(s)"
42 | inputs:
43 | targetType: "inline"
44 | script: |
45 | set -e
46 | mkdir "$(Build.Repository.Name)"
47 | mkdir "mlops-templates"
48 | - checkout: mlops-templates
49 | path: s/templates/
50 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
51 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
52 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
53 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
54 | parameters:
55 | cluster_name: batch-cluster # name must match cluster name in deployment file below
56 | size: STANDARD_DS3_V2
57 | min_instances: 0
58 | max_instances: 5
59 | cluster_tier: dedicated
60 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
61 | parameters:
62 | endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml
63 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
64 | parameters:
65 | deployment_name: taxi-batch-dp
66 | deployment_file: mlops/azureml/deploy/batch/batch-deployment.yml
67 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
68 | parameters:
69 | deployment_name: taxi-batch-dp
70 | sample_request: data/taxi-batch.csv
71 | request_type: uri_file #either uri_folder or uri_file
72 |
73 |
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-model-training-pipeline
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 | - name: version
14 | value: aml-cli-v2
15 |
16 | trigger: none
17 |
18 | pool:
19 | vmImage: ubuntu-20.04
20 |
21 | resources:
22 | repositories:
23 | - repository: mlops-templates # Template Repo
24 | name: mlops-templates
25 | type: git
26 | ref: main
27 |
28 | stages:
29 | - stage: DeployTrainingPipeline
30 | displayName: Deploy Training Pipeline
31 | jobs:
32 | - job: DeployTrainingPipeline
33 | timeoutInMinutes: 120 # how long to run the job before automatically cancelling
34 | steps:
35 | - checkout: self
36 | path: s/
37 | - task: Bash@3
38 | displayName: "Prevent repos dir warnings"
39 | inputs:
40 | targetType: "inline"
41 | script: |
42 | set -e
43 | mkdir "$(Build.Repository.Name)"
44 | mkdir "mlops-templates"
45 | - checkout: mlops-templates
46 | path: s/templates/
47 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
48 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
49 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
50 | - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
51 | parameters:
52 | environment_name: taxi-train-env
53 | environment_file: mlops/azureml/train/train-env.yml
54 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
55 | parameters:
56 | cluster_name: cpu-cluster
57 | size: Standard_DS3_v2
58 | min_instances: 0
59 | max_instances: 4
60 | cluster_tier: low_priority
61 | - template: templates/${{ variables.version }}/register-data.yml@mlops-templates
62 | parameters:
63 | data_type: uri_file
64 | data_name: taxi-data
65 | data_file: mlops/azureml/train/data.yml
66 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
67 | parameters:
68 | pipeline_file: mlops/azureml/train/pipeline.yml
69 | experiment_name: $(environment)_taxi_fare_train_$(Build.SourceBranchName)
70 | display_name: $(environment)_taxi_fare_run_$(Build.BuildID)
71 | enable_monitoring: $(enable_monitoring)
72 |
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-online-endpoint-pipeline
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 | - name: version
14 | value: aml-cli-v2
15 | - name: endpoint_name
16 | value: taxi-online-$(namespace)$(postfix)$(environment)
17 | - name: endpoint_type
18 | value: online
19 |
20 | trigger: none
21 |
22 | pool:
23 | vmImage: ubuntu-20.04
24 |
25 | resources:
26 | repositories:
27 | - repository: mlops-templates # Template Repo
28 | name: mlops-templates
29 | type: git
30 | ref: main
31 |
32 | stages:
33 | - stage: CreateOnlineEndpoint
34 | displayName: Create/Update Online Endpoint
35 | jobs:
36 | - job: DeployOnlineEndpoint
37 | steps:
38 | - checkout: self
39 | path: s/
40 | - task: Bash@3
41 | displayName: "Create checkout repository folder(s)"
42 | inputs:
43 | targetType: "inline"
44 | script: |
45 | set -e
46 | mkdir "$(Build.Repository.Name)"
47 | mkdir "mlops-templates"
48 | - checkout: mlops-templates
49 | path: s/templates/
50 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
51 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
52 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
53 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
54 | parameters:
55 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
56 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
57 | parameters:
58 | deployment_name: taxi-online-dp
59 | deployment_file: mlops/azureml/deploy/online/online-deployment.yml
60 | - template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates
61 | parameters:
62 | traffic_allocation: taxi-online-dp=100
63 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
64 | parameters:
65 | deployment_name: taxi-online-dp
66 | sample_request: data/taxi-request.json
67 | request_type: json
68 |
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/github-actions/deploy-batch-endpoint-pipeline-classical.yml:
--------------------------------------------------------------------------------
1 | name: deploy-batch-endpoint-pipeline
2 |
3 | on:
4 | workflow_dispatch:
5 | jobs:
6 | set-env-branch:
7 | runs-on: ubuntu-latest
8 | outputs:
9 | config-file: ${{ steps.set-output-defaults.outputs.config-file }}
10 | steps:
11 | - id: set-prod-branch
12 | name: set-prod-branch
13 | if: ${{ github.ref == 'refs/heads/main'}}
14 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV;
15 | - id: set-dev-branch
16 | name: setdevbranch
17 | if: ${{ github.ref != 'refs/heads/main'}}
18 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV;
19 | - id: set-output-defaults
20 | name: set-output-defaults
21 | run: |
22 | echo "config-file=$config_env" >> $GITHUB_OUTPUT;
23 | get-config:
24 | needs: set-env-branch
25 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main
26 | with:
27 | file_name: ${{ needs.set-env-branch.outputs.config-file}}
28 | create-compute:
29 | needs: get-config
30 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main
31 | with:
32 | cluster_name: batch-cluster
33 | size: STANDARD_DS3_V2
34 | min_instances: 0
35 | max_instances: 5
36 | cluster_tier: low_priority
37 | resource_group: ${{ needs.get-config.outputs.resource_group }}
38 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
39 | secrets:
40 | creds: ${{secrets.AZURE_CREDENTIALS}}
41 | create-endpoint:
42 | needs: [get-config, create-compute]
43 | uses: Azure/mlops-templates/.github/workflows/create-endpoint.yml@main
44 | with:
45 | resource_group: ${{ needs.get-config.outputs.resource_group }}
46 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
47 | endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml
48 | endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.bep) }}
49 | endpoint_type: batch
50 | secrets:
51 | creds: ${{secrets.AZURE_CREDENTIALS}}
52 | create-deployment:
53 | uses: Azure/mlops-templates/.github/workflows/create-deployment.yml@main
54 | needs: [get-config, create-endpoint]
55 | with:
56 | resource_group: ${{ needs.get-config.outputs.resource_group }}
57 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
58 | endpoint_file: mlops/azureml/deploy/batch/batch-deployment.yml
59 | endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.bep) }}
60 | endpoint_type: batch
61 | deployment_name: eptestdeploy
62 | secrets:
63 | creds: ${{secrets.AZURE_CREDENTIALS}}
64 |
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/github-actions/deploy-model-training-pipeline-classical.yml:
--------------------------------------------------------------------------------
1 | name: deploy-model-training-pipeline
2 |
3 | on:
4 | workflow_dispatch:
5 | jobs:
6 | set-env-branch:
7 | runs-on: ubuntu-latest
8 | outputs:
9 | config-file: ${{ steps.set-output-defaults.outputs.config-file }}
10 | steps:
11 | - id: set-prod-branch
12 | name: set-prod-branch
13 | if: ${{ github.ref == 'refs/heads/main'}}
14 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV;
15 | - id: set-dev-branch
16 | name: setdevbranch
17 | if: ${{ github.ref != 'refs/heads/main'}}
18 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV;
19 | - id: set-output-defaults
20 | name: set-output-defaults
21 | run: |
22 | echo "config-file=$config_env" >> $GITHUB_OUTPUT;
23 | get-config:
24 | needs: set-env-branch
25 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main
26 | with:
27 | file_name: ${{ needs.set-env-branch.outputs.config-file}}
28 | register-environment:
29 | needs: get-config
30 | uses: Azure/mlops-templates/.github/workflows/register-environment.yml@main
31 | with:
32 | resource_group: ${{ needs.get-config.outputs.resource_group }}
33 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
34 | environment_file: mlops/azureml/train/train-env.yml
35 | conda_file: data-science/environment/train-conda.yml
36 | secrets:
37 | creds: ${{secrets.AZURE_CREDENTIALS}}
38 | register-dataset:
39 | needs: get-config
40 | uses: Azure/mlops-templates/.github/workflows/register-dataset.yml@main
41 | with:
42 | resource_group: ${{ needs.get-config.outputs.resource_group }}
43 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
44 | name: taxi-data
45 | data_file: mlops/azureml/train/data.yml
46 | secrets:
47 | creds: ${{secrets.AZURE_CREDENTIALS}}
48 | create-compute:
49 | needs: [get-config]
50 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main
51 | with:
52 | cluster_name: cpu-cluster
53 | size: Standard_DS3_v2
54 | min_instances: 0
55 | max_instances: 4
56 | cluster_tier: low_priority
57 | resource_group: ${{ needs.get-config.outputs.resource_group }}
58 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
59 | secrets:
60 | creds: ${{secrets.AZURE_CREDENTIALS}}
61 | run-model-training-pipeline:
62 | needs: [get-config, register-environment, register-dataset, create-compute]
63 | uses: Azure/mlops-templates/.github/workflows/run-pipeline.yml@main
64 | with:
65 | resource_group: ${{ needs.get-config.outputs.resource_group }}
66 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
67 | parameters-file: mlops/azureml/train/pipeline.yml
68 | job-name: test
69 | secrets:
70 | creds: ${{secrets.AZURE_CREDENTIALS}}
71 |
--------------------------------------------------------------------------------
/classical/aml-cli-v2/mlops/github-actions/deploy-online-endpoint-pipeline-classical.yml:
--------------------------------------------------------------------------------
1 | name: deploy-online-endpoint-pipeline
2 |
3 | on:
4 | workflow_dispatch:
5 | jobs:
6 | set-env-branch:
7 | runs-on: ubuntu-latest
8 | outputs:
9 | config-file: ${{ steps.set-output-defaults.outputs.config-file }}
10 | steps:
11 | - id: set-prod-branch
12 | name: set-prod-branch
13 | if: ${{ github.ref == 'refs/heads/main'}}
14 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV;
15 | - id: set-dev-branch
16 | name: setdevbranch
17 | if: ${{ github.ref != 'refs/heads/main'}}
18 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV;
19 | - id: set-output-defaults
20 | name: set-output-defaults
21 | run: |
22 | echo "config-file=$config_env" >> $GITHUB_OUTPUT;
23 | get-config:
24 | needs: set-env-branch
25 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main
26 | with:
27 | file_name: ${{ needs.set-env-branch.outputs.config-file}}
28 | create-endpoint:
29 | needs: get-config
30 | uses: Azure/mlops-templates/.github/workflows/create-endpoint.yml@main
31 | with:
32 | resource_group: ${{ needs.get-config.outputs.resource_group }}
33 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
34 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
35 | endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }}
36 | endpoint_type: online
37 | secrets:
38 | creds: ${{secrets.AZURE_CREDENTIALS}}
39 | create-deployment:
40 | uses: Azure/mlops-templates/.github/workflows/create-deployment.yml@main
41 | needs: [get-config, create-endpoint]
42 | with:
43 | resource_group: ${{ needs.get-config.outputs.resource_group }}
44 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
45 | endpoint_file: mlops/azureml/deploy/online/online-deployment.yml
46 | endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }}
47 | endpoint_type: online
48 | deployment_name: taxi-online-dp
49 | secrets:
50 | creds: ${{secrets.AZURE_CREDENTIALS}}
51 | allocate-traffic:
52 | uses: Azure/mlops-templates/.github/workflows/allocate-traffic.yml@main
53 | needs: [get-config, create-deployment]
54 | with:
55 | resource_group: ${{ needs.get-config.outputs.resource_group }}
56 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
57 | traffic_allocation: taxi-online-dp=100
58 | endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }}
59 | secrets:
60 | creds: ${{secrets.AZURE_CREDENTIALS}}
61 |
--------------------------------------------------------------------------------
/classical/python-sdk-v1/config-aml.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | variables:
5 |
6 | ap_vm_image: ubuntu-20.04
7 |
8 | # Training pipeline settings
9 |
10 | # Training dataset settings
11 | training_dataset_name: uci-credit
12 | training_dataset_description: uci_credit
13 | training_dataset_local_path: data/training/
14 | training_dataset_path_on_datastore: data/training/
15 | training_dataset_type: local
16 | training_dataset_storage_url: 'https://azureaidemostorage.blob.core.windows.net/data/'
17 |
18 | # Training AzureML Environment name
19 | training_env_name: credit-training
20 |
21 | # Training AzureML Environment conda yaml
22 | training_env_conda_yaml: data-science/environment/train.yml
23 |
24 | # Name for the training pipeline
25 | training_pipeline_name: credit-training
26 |
27 | # Compute target for pipeline
28 | training_target: cpu-cluster
29 | training_target_sku: STANDARD_D2_V2
30 | training_target_min_nodes: 0
31 | training_target_max_nodes: 4
32 |
33 | # Training arguments specification
34 | training_arguments: ''
35 |
36 | # Training datasets specification
37 | # Syntax: :::
38 | training_datasets: uci-credit:1:download:prep
39 |
40 | # Name under which the model will be registered
41 | model_name: credit-ci
42 |
43 | # Batch pipeline settings
44 |
45 | # Batch scoring dataset settings
46 | scoring_dataset_name: credit-batch-input
47 | scoring_dataset_description: credit-batch-input
48 | scoring_dataset_local_path: data/scoring/
49 | scoring_dataset_path_on_datastore: data/scoring/
50 | scoring_dataset_type: local
51 | scoring_dataset_storage_url: 'https://azureaidemostorage.blob.core.windows.net/data/'
52 |
53 | # Batch AzureML Environment name
54 | batch_env_name: credit-batch
55 |
56 | # Batch AzureML Environment conda yaml
57 | batch_env_conda_yaml: data-science/environment/batch.yml
58 |
59 | # Name for the batch scoring pipeline
60 | batch_pipeline_name: credit-batch-scoring
61 |
62 | # Compute target for pipeline
63 | batch_target: cpu-cluster
64 | #not needed because batch uses the same target as training
65 | # batch_target_sku: STANDARD_D2_V2
66 | # batch_target_min_nodes: 0
67 | # batch_target_max_nodes: 4
68 |
69 | # Input batch dataset
70 | batch_input_dataset_name: credit-batch-input
71 |
72 | # Output dataset with results
73 | batch_output_dataset_name: credit-batch-output
74 | batch_output_path_on_datastore: credit-batch-scoring-results/{run-id}
75 | batch_output_filename: results.csv
76 |
77 | # Parallelization settings
78 | batch_mini_batch_size: 8
79 | batch_error_threshold: 1
80 | batch_process_count_per_node: 1
81 | batch_node_count: 1
82 |
83 | # Monitoring settings
84 | scoring_table_name: scoringdata
85 | training_table_name: mlmonitoring
86 |
87 |
--------------------------------------------------------------------------------
/classical/python-sdk-v1/data-science/environment/batch.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: mnist-batch
5 | channels:
6 | - defaults
7 | - anaconda
8 | - conda-forge
9 | dependencies:
10 | - python=3.7.5
11 | - pip
12 | - pip:
13 | - azureml-defaults==1.38.0
14 | - azureml-mlflow==1.38.0
15 | - azureml-sdk==1.38.0
16 | - azureml-interpret==1.38.0
17 | - scikit-learn==0.24.1
18 | - pandas==1.2.1
19 | - joblib==1.0.0
20 | - matplotlib==3.3.3
--------------------------------------------------------------------------------
/classical/python-sdk-v1/data-science/environment/batch_monitor.yml:
--------------------------------------------------------------------------------
1 | name: batch-monitoring
2 | channels:
3 | - defaults
4 | - anaconda
5 | - conda-forge
6 | dependencies:
7 | - python=3.7.5
8 | - pip
9 | - pip:
10 | - azureml-defaults==1.38.0
11 | - azureml-mlflow==1.38.0
12 | - azureml-sdk==1.38.0
13 | - azureml-interpret==1.38.0
14 | - scikit-learn==0.24.1
15 | - pandas==1.2.1
16 | - joblib==1.0.0
17 | - matplotlib==3.3.3
18 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
19 |
--------------------------------------------------------------------------------
/classical/python-sdk-v1/data-science/environment/train.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: mnist-train
5 | channels:
6 | - defaults
7 | - anaconda
8 | - conda-forge
9 | dependencies:
10 | - python=3.7.5
11 | - pip
12 | - pip:
13 | - azureml-mlflow==1.38.0
14 | - azureml-sdk==1.38.0
15 | - scikit-learn==0.24.1
16 | - pandas==1.2.1
17 | - joblib==1.0.0
18 | - matplotlib==3.3.3
19 | - fairlearn==0.7.0
20 | - azureml-contrib-fairness==1.38.0
21 | - interpret-community==0.24.1
22 | - interpret-core==0.2.7
23 | - azureml-interpret==1.38.0
--------------------------------------------------------------------------------
/classical/python-sdk-v1/data-science/environment/train_monitor.yml:
--------------------------------------------------------------------------------
1 | name: train
2 | channels:
3 | - defaults
4 | - anaconda
5 | - conda-forge
6 | dependencies:
7 | - python=3.7.5
8 | - pip
9 | - pip:
10 | - azureml-mlflow==1.38.0
11 | - azureml-sdk==1.38.0
12 | - scikit-learn==0.24.1
13 | - pandas==1.2.1
14 | - joblib==1.0.0
15 | - matplotlib==3.3.3
16 | - fairlearn==0.7.0
17 | - azureml-contrib-fairness==1.38.0
18 | - interpret-community==0.24.1
19 | - interpret-core==0.2.7
20 | - azureml-interpret==1.38.0
21 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
22 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
--------------------------------------------------------------------------------
/classical/python-sdk-v1/data-science/notebooks/experiment1.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/classical/python-sdk-v1/data-science/notebooks/experiment1.ipynb
--------------------------------------------------------------------------------
/classical/python-sdk-v1/data-science/src/prep.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | import os
5 | import sys
6 | import argparse
7 | import joblib
8 | import pandas as pd
9 | import numpy as np
10 |
11 | import mlflow
12 | import mlflow.sklearn
13 |
14 | from azureml.core import Run
15 |
16 | import argparse
17 |
18 | run = Run.get_context()
19 | ws = run.experiment.workspace
20 |
21 | def parse_args():
22 | parser = argparse.ArgumentParser(description="UCI Credit example")
23 | parser.add_argument("--uci-credit", type=str, default='data/', help="Directory path to training data")
24 | parser.add_argument("--prepared_data_path", type=str, default='prepared_data/', help="prepared data directory")
25 | parser.add_argument("--enable_monitoring", type=str, default="false", help="enable logging to ADX")
26 | parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
27 | return parser.parse_known_args()
28 |
29 | def log_training_data(df, table_name):
30 | from obs.collector import Online_Collector
31 | from datetime import timedelta
32 | print("If there is an Authorization error, check your Azure KeyVault secret named kvmonitoringspkey. Terraform might put single quotation marks around the secret. Remove the single quotes and the secret should work.")
33 | collector = Online_Collector(table_name)
34 | df["timestamp"] = [pd.to_datetime('now') - timedelta(days=x) for x in range(len(df))]
35 | collector.batch_collect(df)
36 |
37 |
38 | def main():
39 | # Parse command-line arguments
40 | args, unknown = parse_args()
41 | prepared_data_path = args.prepared_data_path
42 |
43 | # Make sure data output path exists
44 | if not os.path.exists(prepared_data_path):
45 | os.makedirs(prepared_data_path)
46 |
47 | # Enable auto logging
48 | mlflow.sklearn.autolog()
49 |
50 | # Read training data
51 | df = pd.read_csv(os.path.join(args.uci_credit, 'credit.csv'))
52 |
53 | random_data = np.random.rand(len(df))
54 |
55 | msk_train = random_data < 0.7
56 | msk_val = (random_data >= 0.7) & (random_data < 0.85)
57 | msk_test = random_data >= 0.85
58 |
59 | train = df[msk_train]
60 | val = df[msk_val]
61 | test = df[msk_test]
62 |
63 | run.log('TRAIN SIZE', train.shape[0])
64 | run.log('VAL SIZE', val.shape[0])
65 | run.log('TEST SIZE', test.shape[0])
66 |
67 | run.parent.log('TRAIN SIZE', train.shape[0])
68 | run.parent.log('VAL SIZE', val.shape[0])
69 | run.parent.log('TEST SIZE', test.shape[0])
70 |
71 | TRAIN_PATH = os.path.join(prepared_data_path, "train.csv")
72 | VAL_PATH = os.path.join(prepared_data_path, "val.csv")
73 | TEST_PATH = os.path.join(prepared_data_path, "test.csv")
74 |
75 | train.to_csv(TRAIN_PATH, index=False)
76 | val.to_csv(VAL_PATH, index=False)
77 | test.to_csv(TEST_PATH, index=False)
78 |
79 | if (args.enable_monitoring.lower() == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower() == 'yes'):
80 | log_training_data(df, args.table_name)
81 |
82 | if __name__ == '__main__':
83 | main()
84 |
--------------------------------------------------------------------------------
/classical/python-sdk-v1/data-science/src/score.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | import os
5 | import glob
6 | import json
7 | import argparse
8 | import numpy as np
9 | import pandas as pd
10 | import joblib
11 | from datetime import timedelta
12 | from azureml.core.model import Model
13 |
14 | model = None
15 | explainer = None
16 | collector = None
17 |
18 |
19 | def init():
20 | global model, explainer, collector
21 | print("Started batch scoring by running init()")
22 |
23 | parser = argparse.ArgumentParser("batch_scoring")
24 | parser.add_argument("--model_name", type=str, help="Model to use for batch scoring")
25 | parser.add_argument(
26 | "--enable_monitoring", type=str, help="Enable Monitoring", default="false"
27 | )
28 | parser.add_argument("--table_name", type=str, help="Table Name for logging data")
29 | args, _ = parser.parse_known_args()
30 |
31 | model_path = Model.get_model_path(args.model_name)
32 | print(f"Model path: {model_path}")
33 |
34 | if "model.pkl" in model_path:
35 | model = joblib.load(model_path)
36 | else:
37 | model = joblib.load(os.path.join(model_path, "model.pkl"))
38 |
39 | # load the explainer
40 | explainer_path = os.path.join(Model.get_model_path(args.model_name), "explainer")
41 | # explainer = joblib.load(explainer_path)
42 |
43 | if (
44 | args.enable_monitoring.lower() == "true"
45 | or args.enable_monitoring == "1"
46 | or args.enable_monitoring.lower() == "yes"
47 | ):
48 | from obs.collector import Online_Collector
49 |
50 | collector = Online_Collector(args.table_name)
51 |
52 |
53 | def run(file_list):
54 |
55 | print(f"Files to process: {file_list}")
56 | results = pd.DataFrame(
57 | columns=["Sno", "ProbaGoodCredit", "ProbaBadCredit", "FeatureImportance"]
58 | )
59 | all_results = []
60 | for filename in file_list:
61 |
62 | df = pd.read_csv(filename)
63 | sno = df["Sno"]
64 | df = df.drop("Sno", axis=1)
65 |
66 | proba = model.predict_proba(df)
67 | proba = pd.DataFrame(data=proba, columns=["ProbaGoodCredit", "ProbaBadCredit"])
68 |
69 | # explanation = explainer.explain_local(df)
70 | # sorted feature importance values and feature names
71 | # sorted_local_importance_names = explanation.get_ranked_local_names()
72 | # sorted_local_importance_values = explanation.get_ranked_local_values()
73 | # get explanations in dictionnary
74 | # explanations = []
75 | # for i, j in zip(sorted_local_importance_names[0], sorted_local_importance_values[0]):
76 | # explanations.append(dict(zip(i, j)))
77 | # explanation = pd.DataFrame(data=explanations, columns=["FeatureImportance"])
78 |
79 | # result = pd.concat([sno, proba, explanation], axis=1)
80 | result = pd.concat([sno, proba], axis=1)
81 | results = results.append(result)
82 | all_results.append(pd.concat([df, proba], axis=1))
83 | print(f"Batch scored: {filename}")
84 |
85 | if collector:
86 | full_results = pd.concat(all_results)
87 | full_results["timestamp"] = [
88 | pd.to_datetime("now") - timedelta(days=x) for x in range(len(full_results))
89 | ]
90 | collector.batch_collect(full_results)
91 |
92 | return results
93 |
--------------------------------------------------------------------------------
/classical/python-sdk-v1/data-science/tests/test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/classical/python-sdk-v1/data-science/tests/test.py
--------------------------------------------------------------------------------
/classical/python-sdk-v1/mlops/devops-pipelines/deploy-drift-detection.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | variables:
5 | - template: ../../config-aml.yml
6 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
7 | # 'main' branch: PRD environment
8 | - template: ../../config-infra-prod.yml
9 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
10 | # 'develop' or feature branches: DEV environment
11 | - template: ../../config-infra-dev.yml
12 | - name: version
13 | value: python-sdk-v1
14 |
15 | trigger:
16 | - none
17 |
18 | pool:
19 | vmImage: $(ap_vm_image)
20 |
21 | stages:
22 | - stage: DeployDriftJob
23 | displayName: Deploy Drift Job
24 | jobs:
25 | - job: DeployDriftJob
26 | steps:
27 | - checkout: self
28 | path: s/
29 | - checkout: mlops-templates
30 | path: s/templates/
31 | - template: templates/${{ variables.version }}/deploy-drift-detection.yml@mlops-templates
--------------------------------------------------------------------------------
/classical/python-sdk-v1/mlops/devops-pipelines/deploy-model-batch-scoring.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | variables:
5 | - template: ../../config-aml.yml
6 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
7 | # 'main' branch: PRD environment
8 | - template: ../../config-infra-prod.yml
9 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
10 | # 'develop' or feature branches: DEV environment
11 | - template: ../../config-infra-dev.yml
12 | - name: version
13 | value: python-sdk-v1
14 |
15 | trigger: none
16 |
17 | pool:
18 | vmImage: $(ap_vm_image)
19 |
20 | resources:
21 | repositories:
22 | - repository: mlops-templates # Template Repo
23 | name: mlops-templates
24 | type: git
25 | ref: main
26 |
27 | stages:
28 | - stage: DeployBatchScoringPipeline
29 | displayName: Deploy Batch Scoring Pipeline
30 | jobs:
31 | - job: DeployBatchScoringPipeline
32 | steps:
33 | - checkout: self
34 | path: s/
35 | - task: Bash@3
36 | displayName: "Create checkout repository folder(s)"
37 | inputs:
38 | targetType: "inline"
39 | script: |
40 | set -e
41 | mkdir "$(Build.Repository.Name)"
42 | mkdir "mlops-templates"
43 | - checkout: mlops-templates
44 | path: s/templates/
45 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
46 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
47 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
48 | - template: templates/${{ variables.version }}/create-environment.yml@mlops-templates
49 | parameters:
50 | environment_name: $(batch_env_name)
51 | build_type: "conda"
52 | environment_file: $(batch_env_conda_yaml)
53 | enable_monitoring: $(enable_monitoring)
54 | - template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates
55 | parameters:
56 | data_type: scoring
57 | - template: templates/${{ variables.version }}/deploy-batch-scoring-pipeline.yml@mlops-templates
58 | parameters:
59 | enable_monitoring: $(enable_monitoring)
60 | - template: templates/${{ variables.version }}/add-pipeline-to-endpoint.yml@mlops-templates
61 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
62 |
--------------------------------------------------------------------------------
/classical/python-sdk-v1/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-model-training-pipeline
5 |
6 | variables:
7 | - template: ../../config-aml.yml
8 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
9 | # 'main' branch: PRD environment
10 | - template: ../../config-infra-prod.yml
11 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
12 | # 'develop' or feature branches: DEV environment
13 | - template: ../../config-infra-dev.yml
14 | - name: version
15 | value: python-sdk-v1
16 |
17 | trigger: none
18 |
19 | pool:
20 | vmImage: $(ap_vm_image)
21 |
22 | resources:
23 | repositories:
24 | - repository: mlops-templates # Template Repo
25 | name: mlops-templates
26 | type: git
27 | ref: main
28 |
29 | stages:
30 | - stage: DeployTrainingPipeline
31 | displayName: Deploy Training Pipeline
32 | jobs:
33 | - job: DeployTrainingPipeline
34 | steps:
35 | - checkout: self
36 | path: s/
37 | - task: Bash@3
38 | displayName: "Create checkout repository folder(s)"
39 | inputs:
40 | targetType: "inline"
41 | script: |
42 | set -e
43 | mkdir "$(Build.Repository.Name)"
44 | mkdir "mlops-templates"
45 | - checkout: mlops-templates
46 | path: s/templates/
47 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
48 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
49 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
50 | - template: templates/${{ variables.version }}/create-environment.yml@mlops-templates
51 | parameters:
52 | environment_name: $(training_env_name)
53 | build_type: "conda"
54 | environment_file: $(training_env_conda_yaml)
55 | enable_monitoring: $(enable_monitoring)
56 | - template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates
57 | parameters:
58 | data_type: training
59 | - template: templates/${{ variables.version }}/get-compute.yml@mlops-templates
60 | parameters:
61 | compute_type: training
62 | - template: templates/${{ variables.version }}/deploy-training-pipeline.yml@mlops-templates
63 | parameters:
64 | enable_monitoring: $(enable_monitoring)
65 | - template: templates/${{ variables.version }}/add-pipeline-to-endpoint.yml@mlops-templates
66 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
67 |
--------------------------------------------------------------------------------
/classical/python-sdk-v2/data-science/environment/train-conda.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - defaults
3 | - anaconda
4 | - conda-forge
5 | dependencies:
6 | - python=3.7.5
7 | - pip
8 | - pip:
9 | - azureml-mlflow==1.38.0
10 | - azure-ai-ml==1.0.0
11 | - pyarrow==10.0.0
12 | - scikit-learn==0.24.1
13 | - pandas==1.2.1
14 | - joblib==1.0.0
15 | - matplotlib==3.3.3
16 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
17 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
--------------------------------------------------------------------------------
/classical/python-sdk-v2/data-science/src/prep/prep.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 | """
4 | Prepares raw data and provides training, validation and test datasets
5 | """
6 |
7 | import argparse
8 |
9 | from pathlib import Path
10 | import os
11 | import numpy as np
12 | import pandas as pd
13 |
14 | import mlflow
15 |
16 | TARGET_COL = "cost"
17 |
18 | NUMERIC_COLS = [
19 | "distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
20 | "pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
21 | "pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
22 | "dropoff_hour", "dropoff_minute", "dropoff_second"
23 | ]
24 |
25 | CAT_NOM_COLS = [
26 | "store_forward", "vendor"
27 | ]
28 |
29 | CAT_ORD_COLS = [
30 | ]
31 |
32 | def parse_args():
33 | '''Parse input arguments'''
34 |
35 | parser = argparse.ArgumentParser("prep")
36 | parser.add_argument("--raw_data", type=str, help="Path to raw data")
37 | parser.add_argument("--train_data", type=str, help="Path to train dataset")
38 | parser.add_argument("--val_data", type=str, help="Path to test dataset")
39 | parser.add_argument("--test_data", type=str, help="Path to test dataset")
40 |
41 | parser.add_argument("--enable_monitoring", type=str, help="enable logging to ADX")
42 | parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
43 |
44 | args = parser.parse_args()
45 |
46 | return args
47 |
48 | def log_training_data(df, table_name):
49 | from obs.collector import Online_Collector
50 | collector = Online_Collector(table_name)
51 | collector.batch_collect(df)
52 |
53 | def main(args):
54 | '''Read, split, and save datasets'''
55 |
56 | # ------------ Reading Data ------------ #
57 | # -------------------------------------- #
58 |
59 | data = pd.read_csv((Path(args.raw_data)))
60 | data = data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS + [TARGET_COL]]
61 |
62 | # ------------- Split Data ------------- #
63 | # -------------------------------------- #
64 |
65 | # Split data into train, val and test datasets
66 |
67 | random_data = np.random.rand(len(data))
68 |
69 | msk_train = random_data < 0.7
70 | msk_val = (random_data >= 0.7) & (random_data < 0.85)
71 | msk_test = random_data >= 0.85
72 |
73 | train = data[msk_train]
74 | val = data[msk_val]
75 | test = data[msk_test]
76 |
77 | mlflow.log_metric('train size', train.shape[0])
78 | mlflow.log_metric('val size', val.shape[0])
79 | mlflow.log_metric('test size', test.shape[0])
80 |
81 | train.to_parquet((Path(args.train_data) / "train.parquet"))
82 | val.to_parquet((Path(args.val_data) / "val.parquet"))
83 | test.to_parquet((Path(args.test_data) / "test.parquet"))
84 |
85 | if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
86 | log_training_data(data, args.table_name)
87 |
88 |
89 | if __name__ == "__main__":
90 |
91 | mlflow.start_run()
92 |
93 | # ---------- Parse Arguments ----------- #
94 | # -------------------------------------- #
95 |
96 | args = parse_args()
97 |
98 | lines = [
99 | f"Raw data path: {args.raw_data}",
100 | f"Train dataset output path: {args.train_data}",
101 | f"Val dataset output path: {args.val_data}",
102 | f"Test dataset path: {args.test_data}",
103 |
104 | ]
105 |
106 | for line in lines:
107 | print(line)
108 |
109 | main(args)
110 |
111 | mlflow.end_run()
112 |
113 |
114 |
--------------------------------------------------------------------------------
/classical/python-sdk-v2/data-science/src/register/register.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 | """
4 | Registers trained ML model if deploy flag is True.
5 | """
6 |
7 | import argparse
8 | from pathlib import Path
9 | import pickle
10 | import mlflow
11 |
12 | import os
13 | import json
14 |
15 | def parse_args():
16 | '''Parse input arguments'''
17 |
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('--model_name', type=str, help='Name under which model will be registered')
20 | parser.add_argument('--model_path', type=str, help='Model directory')
21 | parser.add_argument('--evaluation_output', type=str, help='Path of eval results')
22 | parser.add_argument(
23 | "--model_info_output_path", type=str, help="Path to write model info JSON"
24 | )
25 | args, _ = parser.parse_known_args()
26 | print(f'Arguments: {args}')
27 |
28 | return args
29 |
30 |
31 | def main(args):
32 | '''Loads model, registers it if deply flag is True'''
33 |
34 | with open((Path(args.evaluation_output) / "deploy_flag"), 'rb') as infile:
35 | deploy_flag = int(infile.read())
36 |
37 | mlflow.log_metric("deploy flag", int(deploy_flag))
38 | deploy_flag=1
39 | if deploy_flag==1:
40 |
41 | print("Registering ", args.model_name)
42 |
43 | # load model
44 | model = mlflow.sklearn.load_model(args.model_path)
45 |
46 | # log model using mlflow
47 | mlflow.sklearn.log_model(model, args.model_name)
48 |
49 | # register logged model using mlflow
50 | run_id = mlflow.active_run().info.run_id
51 | model_uri = f'runs:/{run_id}/{args.model_name}'
52 | mlflow_model = mlflow.register_model(model_uri, args.model_name)
53 | model_version = mlflow_model.version
54 |
55 | # write model info
56 | print("Writing JSON")
57 | dict = {"id": "{0}:{1}".format(args.model_name, model_version)}
58 | output_path = os.path.join(args.model_info_output_path, "model_info.json")
59 | with open(output_path, "w") as of:
60 | json.dump(dict, fp=of)
61 |
62 | else:
63 | print("Model will not be registered!")
64 |
65 | if __name__ == "__main__":
66 |
67 | mlflow.start_run()
68 |
69 | # ---------- Parse Arguments ----------- #
70 | # -------------------------------------- #
71 |
72 | args = parse_args()
73 |
74 | lines = [
75 | f"Model name: {args.model_name}",
76 | f"Model path: {args.model_path}",
77 | f"Evaluation output path: {args.evaluation_output}",
78 | ]
79 |
80 | for line in lines:
81 | print(line)
82 |
83 | main(args)
84 |
85 | mlflow.end_run()
86 |
--------------------------------------------------------------------------------
/classical/python-sdk-v2/data/taxi-request.json:
--------------------------------------------------------------------------------
1 | {"input_data": [[2.86,40.66551971,-73.98258972,1,40.69801331,-73.97357178,0,2,1,1,19,21,3,56,1,1,19,21,21,57],
2 | [3.98,40.68072128,-73.931633,1,40.6909523,-73.99185181,0,2,0,1,4,21,44,11,0,1,4,21,59,35]]
3 | }
--------------------------------------------------------------------------------
/classical/python-sdk-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-batch-endpoint-pipeline
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 | - name: version
14 | value: python-sdk-v2
15 | - name: endpoint_name
16 | value: taxi-batch-$(namespace)$(postfix)$(environment)
17 |
18 | trigger: none
19 |
20 | pool:
21 | vmImage: ubuntu-20.04
22 |
23 | resources:
24 | repositories:
25 | - repository: mlops-templates # Template Repo
26 | name: mlops-templates
27 | type: git
28 | ref: main
29 |
30 | stages:
31 | - stage: CreateBatchEndpoint
32 | displayName: Create/Update Batch Endpoint
33 | jobs:
34 | - job: DeployBatchEndpoint
35 | steps:
36 | - checkout: self
37 | path: s/
38 | - task: Bash@3
39 | displayName: "Create checkout repository folder(s)"
40 | inputs:
41 | targetType: "inline"
42 | script: |
43 | set -e
44 | mkdir "$(Build.Repository.Name)"
45 | mkdir "mlops-templates"
46 | - checkout: mlops-templates
47 | path: s/templates/
48 | - template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates
49 | - template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates
50 | - template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates
51 | - template: templates/aml-cli-v2/create-compute.yml@mlops-templates
52 | parameters:
53 | cluster_name: batch-cluster # name must match cluster name in deployment file below
54 | size: STANDARD_DS3_V2
55 | min_instances: 0
56 | max_instances: 5
57 | cluster_tier: dedicated
58 | - template: templates/${{ variables.version }}/create-batch-endpoint.yml@mlops-templates
59 | parameters:
60 | endpoint_name: "${{ variables.endpoint_name }}"
61 | endpoint_description: "Taxi batch endpoint"
62 | auth_mode: "aad_token"
63 | - template: templates/${{ variables.version }}/create-batch-deployment.yml@mlops-templates
64 | parameters:
65 | deployment_name: taxi-batch-dp
66 | deployment_description: "Taxi batch deployment"
67 | endpoint_name: "${{ variables.endpoint_name }}"
68 | model_path: "taxi-model@latest"
69 | compute: batch-cluster
70 | - template: templates/${{ variables.version }}/test-batch-endpoint.yml@mlops-templates
71 | parameters:
72 | endpoint_name: "${{ variables.endpoint_name }}"
73 | sample_request: data/taxi-batch.csv
74 | request_type: uri_file #either uri_folder or uri_file
75 |
--------------------------------------------------------------------------------
/classical/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-model-training-pipeline
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 | - name: version
14 | value: python-sdk-v2
15 |
16 | trigger: none
17 |
18 | pool:
19 | vmImage: ubuntu-20.04
20 |
21 | resources:
22 | repositories:
23 | - repository: mlops-templates # Template Repo
24 | name: mlops-templates
25 | type: git
26 | ref: main
27 |
28 | stages:
29 | - stage: DeployTrainingPipeline
30 | displayName: Deploy Training Pipeline
31 | jobs:
32 | - job: DeployTrainingPipeline
33 | timeoutInMinutes: 120 # how long to run the job before automatically cancelling
34 | steps:
35 | - checkout: self
36 | path: s/
37 | - task: Bash@3
38 | displayName: "Create checkout repository folder(s)"
39 | inputs:
40 | targetType: "inline"
41 | script: |
42 | set -e
43 | mkdir "$(Build.Repository.Name)"
44 | mkdir "mlops-templates"
45 | - checkout: mlops-templates
46 | path: s/templates/
47 | - template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates
48 | - template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates
49 | - template: templates/python-sdk-v2/install-requirements.yml@mlops-templates
50 | - template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates
51 | - template: templates/aml-cli-v2/create-compute.yml@mlops-templates
52 | parameters:
53 | cluster_name: cpu-cluster
54 | size: Standard_DS3_v2
55 | min_instances: 0
56 | max_instances: 4
57 | cluster_tier: low_priority
58 | - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
59 | parameters:
60 | environment_name: taxi-train-env
61 | environment_description: "Training Environment for Taxi Pipeline"
62 | environment_path: data-science/environment/train-conda.yml
63 | build_type: conda
64 | - template: templates/${{ variables.version }}/register-data-asset.yml@mlops-templates
65 | parameters:
66 | data_name: taxi-data
67 | data_description: taxi-training-dataset
68 | data_path: data/taxi-data.csv
69 | data_type: uri_file
70 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
71 | parameters:
72 | pipeline_path: mlops/azureml/train/run_pipeline.py
73 | experiment_name: taxi-train-pipeline
74 | data_name: taxi-data
75 | environment_name: taxi-train-env
76 | compute_name: cpu-cluster
77 | enable_monitoring: $(enable_monitoring)
78 | table_name: "taximonitoring"
79 |
--------------------------------------------------------------------------------
/classical/python-sdk-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-online-endpoint-pipeline
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 | - name: version
14 | value: python-sdk-v2
15 | - name: endpoint_name
16 | value: taxi-online-$(namespace)$(postfix)$(environment)
17 |
18 | trigger: none
19 |
20 | pool:
21 | vmImage: ubuntu-20.04
22 |
23 | resources:
24 | repositories:
25 | - repository: mlops-templates # Template Repo
26 | name: mlops-templates
27 | type: git
28 | ref: main
29 |
30 | stages:
31 | - stage: CreateOnlineEndpoint
32 | displayName: Create/Update Online Endpoint
33 | jobs:
34 | - job: DeployOnlineEndpoint
35 | steps:
36 | - checkout: self
37 | path: s/
38 | - task: Bash@3
39 | displayName: "Create checkout repository folder(s)"
40 | inputs:
41 | targetType: "inline"
42 | script: |
43 | set -e
44 | mkdir "$(Build.Repository.Name)"
45 | mkdir "mlops-templates"
46 | - checkout: mlops-templates
47 | path: s/templates/
48 | - template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates
49 | - template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates
50 | - template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates
51 | - template: templates/${{ variables.version }}/create-online-endpoint.yml@mlops-templates
52 | parameters:
53 | endpoint_name: "${{ variables.endpoint_name }}"
54 | endpoint_description: "Taxi Online Endpoint"
55 | auth_mode: "aml_token"
56 | - template: templates/${{ variables.version }}/create-online-deployment.yml@mlops-templates
57 | parameters:
58 | deployment_name: taxi-online-dp
59 | endpoint_name: "${{ variables.endpoint_name }}"
60 | model_path: "taxi-model@latest"
61 | traffic_allocation: 100
62 | - template: templates/${{ variables.version }}/test-online-endpoint.yml@mlops-templates
63 | parameters:
64 | endpoint_name: "${{ variables.endpoint_name }}"
65 | sample_request: data/taxi-request.json
66 |
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/data-science/environment/train-conda.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - defaults
3 | - anaconda
4 | - conda-forge
5 | dependencies:
6 | - python=3.7.5
7 | - pip
8 | - pip:
9 | - azureml-mlflow==1.38.0
10 | - azureml-sdk==1.38.0
11 | - scikit-learn==0.24.1
12 | - pandas==1.2.1
13 | - joblib==1.0.0
14 | - matplotlib==3.3.3
15 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
16 | - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/data-science/environment/train-requirements.txt:
--------------------------------------------------------------------------------
1 | azureml-mlflow==1.38.0
2 | pyarrow==10.0.0
3 | scikit-learn==0.24.1
4 | pandas==1.2.1
5 | joblib==1.2.0
6 | matplotlib==3.3.3
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/data-science/experiment/requirements.txt:
--------------------------------------------------------------------------------
1 | azureml-mlflow==1.38.0
2 | scikit-learn==0.24.1
3 | pandas==1.2.1
4 | joblib==1.2.0
5 | matplotlib==3.3.3
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/data-science/src/prep/prep.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 | """
4 | Prepares raw data and provides training, validation and test datasets
5 | """
6 |
7 | import argparse
8 |
9 | from pathlib import Path
10 | import os
11 | import numpy as np
12 | import pandas as pd
13 |
14 | import mlflow
15 |
16 | TARGET_COL = "cost"
17 |
18 | NUMERIC_COLS = [
19 | "distance",
20 | "dropoff_latitude",
21 | "dropoff_longitude",
22 | "passengers",
23 | "pickup_latitude",
24 | "pickup_longitude",
25 | "pickup_weekday",
26 | "pickup_month",
27 | "pickup_monthday",
28 | "pickup_hour",
29 | "pickup_minute",
30 | "pickup_second",
31 | "dropoff_weekday",
32 | "dropoff_month",
33 | "dropoff_monthday",
34 | "dropoff_hour",
35 | "dropoff_minute",
36 | "dropoff_second",
37 | ]
38 |
39 | CAT_NOM_COLS = [
40 | "store_forward",
41 | "vendor",
42 | ]
43 |
44 | CAT_ORD_COLS = [
45 | ]
46 |
47 | def parse_args():
48 | '''Parse input arguments'''
49 |
50 | parser = argparse.ArgumentParser("prep")
51 | parser.add_argument("--raw_data", type=str, help="Path to raw data")
52 | parser.add_argument("--train_data", type=str, help="Path to train dataset")
53 | parser.add_argument("--val_data", type=str, help="Path to test dataset")
54 | parser.add_argument("--test_data", type=str, help="Path to test dataset")
55 |
56 | parser.add_argument("--enable_monitoring", type=str, help="enable logging to ADX")
57 | parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
58 |
59 | args = parser.parse_args()
60 |
61 | return args
62 |
63 | def log_training_data(df, table_name):
64 | from obs.collector import Online_Collector
65 | collector = Online_Collector(table_name)
66 | collector.batch_collect(df)
67 |
68 | def main(args):
69 | '''Read, split, and save datasets'''
70 |
71 | # ------------ Reading Data ------------ #
72 | # -------------------------------------- #
73 |
74 | data = pd.read_csv((Path(args.raw_data)))
75 | data = data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS + [TARGET_COL]]
76 |
77 | # ------------- Split Data ------------- #
78 | # -------------------------------------- #
79 |
80 | # Split data into train, val and test datasets
81 |
82 | random_data = np.random.rand(len(data))
83 |
84 | msk_train = random_data < 0.7
85 | msk_val = (random_data >= 0.7) & (random_data < 0.85)
86 | msk_test = random_data >= 0.85
87 |
88 | train = data[msk_train]
89 | val = data[msk_val]
90 | test = data[msk_test]
91 |
92 | mlflow.log_metric('train size', train.shape[0])
93 | mlflow.log_metric('val size', val.shape[0])
94 | mlflow.log_metric('test size', test.shape[0])
95 |
96 | train.to_parquet((Path(args.train_data) / "train.parquet"))
97 | val.to_parquet((Path(args.val_data) / "val.parquet"))
98 | test.to_parquet((Path(args.test_data) / "test.parquet"))
99 |
100 | if (args.enable_monitoring.lower() == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower() == 'yes'):
101 | log_training_data(data, args.table_name)
102 |
103 |
104 | if __name__ == "__main__":
105 |
106 | mlflow.start_run()
107 |
108 | # ---------- Parse Arguments ----------- #
109 | # -------------------------------------- #
110 |
111 | args = parse_args()
112 |
113 | lines = [
114 | f"Raw data path: {args.raw_data}",
115 | f"Train dataset output path: {args.train_data}",
116 | f"Val dataset output path: {args.val_data}",
117 | f"Test dataset path: {args.test_data}",
118 |
119 | ]
120 |
121 | for line in lines:
122 | print(line)
123 |
124 | main(args)
125 |
126 | mlflow.end_run()
127 |
128 |
129 |
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/data-science/src/register/register.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 | """
4 | Registers trained ML model if deploy flag is True.
5 | """
6 |
7 | import argparse
8 | from pathlib import Path
9 | import pickle
10 | import mlflow
11 |
12 | import os
13 | import json
14 |
15 | def parse_args():
16 | '''Parse input arguments'''
17 |
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('--model_name', type=str, help='Name under which model will be registered')
20 | parser.add_argument('--model_path', type=str, help='Model directory')
21 | parser.add_argument('--evaluation_output', type=str, help='Path of eval results')
22 | parser.add_argument(
23 | "--model_info_output_path", type=str, help="Path to write model info JSON"
24 | )
25 | args, _ = parser.parse_known_args()
26 | print(f'Arguments: {args}')
27 |
28 | return args
29 |
30 |
31 | def main(args):
32 | '''Loads model, registers it if deply flag is True'''
33 |
34 | with open((Path(args.evaluation_output) / "deploy_flag"), 'rb') as infile:
35 | deploy_flag = int(infile.read())
36 |
37 | mlflow.log_metric("deploy flag", int(deploy_flag))
38 | deploy_flag=1
39 | if deploy_flag==1:
40 |
41 | print("Registering ", args.model_name)
42 |
43 | # load model
44 | model = mlflow.sklearn.load_model(args.model_path)
45 |
46 | # log model using mlflow
47 | mlflow.sklearn.log_model(model, args.model_name)
48 |
49 | # register logged model using mlflow
50 | run_id = mlflow.active_run().info.run_id
51 | model_uri = f'runs:/{run_id}/{args.model_name}'
52 | mlflow_model = mlflow.register_model(model_uri, args.model_name)
53 | model_version = mlflow_model.version
54 |
55 | # write model info
56 | print("Writing JSON")
57 | dict = {"id": "{0}:{1}".format(args.model_name, model_version)}
58 | output_path = os.path.join(args.model_info_output_path, "model_info.json")
59 | with open(output_path, "w") as of:
60 | json.dump(dict, fp=of)
61 |
62 | else:
63 | print("Model will not be registered!")
64 |
65 | if __name__ == "__main__":
66 |
67 | mlflow.start_run()
68 |
69 | # ---------- Parse Arguments ----------- #
70 | # -------------------------------------- #
71 |
72 | args = parse_args()
73 |
74 | lines = [
75 | f"Model name: {args.model_name}",
76 | f"Model path: {args.model_path}",
77 | f"Evaluation output path: {args.evaluation_output}",
78 | ]
79 |
80 | for line in lines:
81 | print(line)
82 |
83 | main(args)
84 |
85 | mlflow.end_run()
86 |
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/data/taxi-request.json:
--------------------------------------------------------------------------------
1 | {"input_data": [[2.86,40.66551971,-73.98258972,1,40.69801331,-73.97357178,0,2,1,1,19,21,3,56,1,1,19,21,21,57],
2 | [3.98,40.68072128,-73.931633,1,40.6909523,-73.99185181,0,2,0,1,4,21,44,11,0,1,4,21,59,35]]
3 | }
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/mlops/azureml/deploy/batch/batch-deployment.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
2 | name: batch-dp
3 | endpoint_name: taxi-fare-batch
4 | model: azureml:taxi-model@latest
5 | compute: azureml:batch-cluster
6 | resources:
7 | instance_count: 1
8 | max_concurrency_per_instance: 2
9 | mini_batch_size: 10
10 | output_action: append_row
11 | output_file_name: predictions.csv
12 | retry_settings:
13 | max_retries: 3
14 | timeout: 30
15 | error_threshold: -1
16 | logging_level: info
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/mlops/azureml/deploy/batch/batch-endpoint.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/batchEndpoint.schema.json
2 | name: taxi-fare-batch
3 | description: taxi cost batch endpoint
4 | auth_mode: aad_token
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/mlops/azureml/deploy/online/online-deployment.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
2 | name: blue
3 | endpoint_name: taxi-fare-online
4 | model: azureml:taxi-model@latest
5 | instance_type: Standard_DS2_v2
6 | instance_count: 1
7 |
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/mlops/azureml/deploy/online/online-endpoint.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
2 | name: taxi-fare-online
3 | description: taxi cost online endpoint
4 | auth_mode: key
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/mlops/azureml/deploy/online/score.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/classical/rai-aml-cli-v2/mlops/azureml/deploy/online/score.py
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/mlops/azureml/train/train-env.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
2 | name: taxi-train-env
3 | image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
4 | conda_file: ../../../data-science/environment/train-conda.yml
5 | description: Environment created from a Docker image plus Conda environment to train taxi model.
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-batch-endpoint-pipeline
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 | - name: version
14 | value: aml-cli-v2
15 | - name: endpoint_name
16 | value: taxi-batch-$(namespace)$(postfix)$(environment)
17 | - name: endpoint_type
18 | value: batch
19 |
20 | trigger: none
21 |
22 | pool:
23 | vmImage: ubuntu-20.04
24 |
25 | resources:
26 | repositories:
27 | - repository: mlops-templates # Template Repo
28 | name: mlops-templates
29 | type: git
30 | ref: main
31 |
32 | stages:
33 | - stage: CreateBatchEndpoint
34 | displayName: Create/Update Batch Endpoint
35 | jobs:
36 | - job: DeployBatchEndpoint
37 | steps:
38 | - checkout: self
39 | path: s/
40 | - task: Bash@3
41 | displayName: "Create checkout repository folder(s)"
42 | inputs:
43 | targetType: "inline"
44 | script: |
45 | set -e
46 | mkdir "$(Build.Repository.Name)"
47 | mkdir "mlops-templates"
48 | - checkout: mlops-templates
49 | path: s/templates/
50 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
51 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
52 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
53 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
54 | parameters:
55 | cluster_name: batch-cluster # name must match cluster name in deployment file below
56 | size: STANDARD_DS3_V2
57 | min_instances: 0
58 | max_instances: 5
59 | cluster_tier: dedicated
60 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
61 | parameters:
62 | endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml
63 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
64 | parameters:
65 | deployment_name: taxi-batch-dp
66 | deployment_file: mlops/azureml/deploy/batch/batch-deployment.yml
67 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
68 | parameters:
69 | deployment_name: taxi-batch-dp
70 | sample_request: data/taxi-batch.csv
71 | request_type: uri_file #either uri_folder or uri_file
72 |
73 |
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-online-endpoint-pipeline
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 | - name: version
14 | value: aml-cli-v2
15 | - name: endpoint_name
16 | value: taxi-online-$(namespace)$(postfix)$(environment)
17 | - name: endpoint_type
18 | value: online
19 |
20 | trigger: none
21 |
22 | pool:
23 | vmImage: ubuntu-20.04
24 |
25 | resources:
26 | repositories:
27 | - repository: mlops-templates # Template Repo
28 | name: mlops-templates
29 | type: git
30 | ref: main
31 |
32 | stages:
33 | - stage: CreateOnlineEndpoint
34 | displayName: Create/Update Online Endpoint
35 | jobs:
36 | - job: DeployOnlineEndpoint
37 | steps:
38 | - checkout: self
39 | path: s/
40 | - task: Bash@3
41 | displayName: "Create checkout repository folder(s)"
42 | inputs:
43 | targetType: "inline"
44 | script: |
45 | set -e
46 | mkdir "$(Build.Repository.Name)"
47 | mkdir "mlops-templates"
48 | - checkout: mlops-templates
49 | path: s/templates/
50 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
51 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
52 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
53 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
54 | parameters:
55 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
56 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
57 | parameters:
58 | deployment_name: taxi-online-dp
59 | deployment_file: mlops/azureml/deploy/online/online-deployment.yml
60 | - template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates
61 | parameters:
62 | traffic_allocation: taxi-online-dp=100
63 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
64 | parameters:
65 | deployment_name: taxi-online-dp
66 | sample_request: data/taxi-request.json
67 | request_type: json
68 |
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/mlops/devops-pipelines/register-rai-components.yml:
--------------------------------------------------------------------------------
1 | steps:
2 | - task: AzureCLI@2
3 | displayName: Register RAI components
4 | continueOnError: true
5 | inputs:
6 | azureSubscription: $(ado_service_connection_rg) #needs to have access at the RG level
7 | scriptType: bash
8 | workingDirectory: $(System.DefaultWorkingDirectory)
9 | scriptLocation: inlineScript
10 | inlineScript: |
11 | subscription_id=$(az account list --query "[?isDefault].id | [0]" --output tsv)
12 | chmod u+x quick-setup.bash
13 | bash quick-setup.bash conda-env $subscription_id $(resource_group) $(aml_workspace)
14 |
--------------------------------------------------------------------------------
/classical/rai-aml-cli-v2/mlops/devops-pipelines/trigger_.code-search:
--------------------------------------------------------------------------------
1 | # Query: trigger:
2 | # ContextLines: 1
3 |
4 | 20 results - 20 files
5 |
6 | classical/aml-cli-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml:
7 | 17
8 | 18: trigger: none
9 |
10 | classical/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
11 | 13
12 | 14: trigger: none
13 |
14 | classical/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml:
15 | 17
16 | 18: trigger: none
17 |
18 | classical/python-sdk-v1/mlops/devops-pipelines/deploy-drift-detection.yml:
19 | 14
20 | 15: trigger: none
21 |
22 | classical/python-sdk-v1/mlops/devops-pipelines/deploy-model-batch-scoring.yml:
23 | 14
24 | 15: trigger: none
25 |
26 | classical/python-sdk-v1/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
27 | 14
28 | 15: trigger: none
29 |
30 | classical/python-sdk-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml:
31 | 15
32 | 16: trigger: none
33 |
34 | classical/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
35 | 13
36 | 14: trigger: none
37 |
38 | classical/python-sdk-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml:
39 | 15
40 | 16: trigger: none
41 |
42 | classical/rai-aml-cli-v2/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml:
43 | 17
44 | 18: trigger: none
45 |
46 | classical/rai-aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
47 | 13
48 | 14: trigger: none
49 |
50 | classical/rai-aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml:
51 | 17
52 | 18: trigger: none
53 |
54 | cv/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
55 | 13
56 | 14: trigger: none
57 |
58 | cv/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml:
59 | 17
60 | 18: trigger: none
61 |
62 | cv/python-sdk-v1/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
63 | 14
64 | 15: trigger: none
65 |
66 | infrastructure/bicep/pipelines/bicep-ado-deploy-infra.yml:
67 | 11
68 | 12: trigger: none
69 |
70 | infrastructure/terraform/devops-pipelines/tf-ado-deploy-infra.yml:
71 | 11
72 | 12: trigger: none
73 |
74 | nlp/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
75 | 13
76 | 14: trigger: none
77 |
78 | nlp/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml:
79 | 17
80 | 18: trigger: none
81 |
82 | nlp/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
83 | 13
84 | 14: trigger: none
85 |
--------------------------------------------------------------------------------
/config-infra-dev.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | # Dev environment
5 | variables:
6 | # Global
7 | ap_vm_image: ubuntu-20.04
8 |
9 | namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
10 | postfix: 0001
11 | location: eastus
12 | environment: dev
13 | enable_aml_computecluster: true
14 | enable_monitoring: false
15 |
16 | # Azure DevOps
17 | ado_service_connection_rg: Azure-ARM-Dev
18 | ado_service_connection_aml_ws: Azure-ARM-Dev
19 |
20 | # DO NOT TOUCH
21 |
22 | # For pipeline reference
23 | resource_group: rg-$(namespace)-$(postfix)$(environment)
24 | aml_workspace: mlw-$(namespace)-$(postfix)$(environment)
25 | application_insights: mlw-$(namespace)-$(postfix)$(environment)
26 | key_vault: kv-$(namespace)-$(postfix)$(environment)
27 | container_registry: cr$(namespace)$(postfix)$(environment)
28 | storage_account: st$(namespace)$(postfix)$(environment)
29 |
30 | # For terraform reference
31 | terraform_version: 1.3.6
32 | terraform_workingdir: infrastructure
33 | terraform_st_location: $(location)
34 | terraform_st_resource_group: rg-$(namespace)-$(postfix)$(environment)-tf
35 | terraform_st_storage_account: st$(namespace)$(postfix)$(environment)tf
36 | terraform_st_container_name: default
37 | terraform_st_key: mlops-tab
38 |
--------------------------------------------------------------------------------
/config-infra-prod.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | # Prod environment
5 | variables:
6 | # Global
7 | ap_vm_image: ubuntu-20.04
8 |
9 | namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
10 | postfix: 0001
11 | location: eastus
12 | environment: prod
13 | enable_aml_computecluster: true
14 | enable_monitoring: false
15 |
16 | # Azure DevOps
17 | ado_service_connection_rg: Azure-ARM-Prod
18 | ado_service_connection_aml_ws: Azure-ARM-Prod
19 |
20 | # DO NOT TOUCH
21 |
22 | # For pipeline reference
23 | resource_group: rg-$(namespace)-$(postfix)$(environment)
24 | aml_workspace: mlw-$(namespace)-$(postfix)$(environment)
25 | application_insights: mlw-$(namespace)-$(postfix)$(environment)
26 | key_vault: kv-$(namespace)-$(postfix)$(environment)
27 | container_registry: cr$(namespace)$(postfix)$(environment)
28 | storage_account: st$(namespace)$(postfix)$(environment)
29 |
30 | # For terraform reference
31 | terraform_version: 1.3.6
32 | terraform_workingdir: infrastructure
33 | terraform_st_location: $(location)
34 | terraform_st_resource_group: rg-$(namespace)-$(postfix)$(environment)-tf
35 | terraform_st_storage_account: st$(namespace)$(postfix)$(environment)tf
36 | terraform_st_container_name: default
37 | terraform_st_key: mlops-tab
38 |
--------------------------------------------------------------------------------
/cv/README.md:
--------------------------------------------------------------------------------
1 | # Computer Vision
2 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/data-science/environment/Dockerfile:
--------------------------------------------------------------------------------
1 | # check release notes https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
2 | FROM nvcr.io/nvidia/pytorch:22.04-py3
3 |
4 | ##############################################################################
5 | # NCCL TESTS
6 | ##############################################################################
7 | ENV NCCL_TESTS_TAG=v2.11.0
8 |
9 | # NOTE: adding gencodes to support K80, M60, V100, A100
10 | RUN mkdir /tmp/nccltests && \
11 | cd /tmp/nccltests && \
12 | git clone -b ${NCCL_TESTS_TAG} https://github.com/NVIDIA/nccl-tests.git && \
13 | cd nccl-tests && \
14 | make \
15 | MPI=1 MPI_HOME=/opt/hpcx/ompi \
16 | NVCC_GENCODE="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_80,code=sm_80" \
17 | CUDA_HOME=/usr/local/cuda && \
18 | cp ./build/* /usr/local/bin && \
19 | rm -rf /tmp/nccltests
20 |
21 | # Install dependencies missing in this container
22 | # NOTE: container already has matplotlib==3.5.1 tqdm==4.62.0
23 | COPY requirements.txt ./
24 | RUN pip install -r requirements.txt
25 |
26 | # RUN python -m pip install azureml-defaults==1.41.0 \
27 | # mlflow==1.25.1 \
28 | # azureml-mlflow==1.41.0 \
29 | # transformers==4.18.0 \
30 | # psutil==5.9.0
31 |
32 | # add ndv4-topo.xml
33 | RUN mkdir /opt/microsoft/
34 | ADD ./ndv4-topo.xml /opt/microsoft
35 |
36 | # to use on A100, enable env var below in your job
37 | # ENV NCCL_TOPO_FILE="/opt/microsoft/ndv4-topo.xml"
38 |
39 | # adjusts the level of info from NCCL tests
40 | ENV NCCL_DEBUG="INFO"
41 | ENV NCCL_DEBUG_SUBSYS="GRAPH,INIT,ENV"
42 |
43 | # Relaxed Ordering can greatly help the performance of Infiniband networks in virtualized environments.
44 | ENV NCCL_IB_PCI_RELAXED_ORDERING="1"
45 | ENV CUDA_DEVICE_ORDER="PCI_BUS_ID"
46 | ENV NCCL_SOCKET_IFNAME="eth0"
47 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/data-science/environment/ndv4-topo.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/data-science/environment/requirements.txt:
--------------------------------------------------------------------------------
1 | # for local testing (cpu)
2 | torchvision==0.12.0
3 | torch==1.13.1
4 | transformers==4.18.0
5 |
6 | # for metrics reporting/plotting
7 | mlflow==2.3.1
8 | azureml-mlflow==1.41.0
9 | matplotlib==3.5.2
10 | tqdm==4.64.0
11 | psutil==5.9.0
12 |
13 | # for unit testing
14 | pytest==7.1.2
15 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/data-science/requirements-tests.txt:
--------------------------------------------------------------------------------
1 | # NOTE: install these requirements to run the unit tests
2 |
3 | # CV packages
4 | torchvision==0.12.0
5 | torch==1.13.1
6 | transformers==4.18.0
7 |
8 | # for metrics reporting/plotting
9 | mlflow==2.3.1
10 | azureml-mlflow==1.41.0
11 | matplotlib==3.5.2
12 | tqdm==4.64.0
13 | psutil==5.9.0
14 |
15 | # for unit testing
16 | pytest==7.1.2
17 | pytest-cov==2.12.1
18 |
19 | # Fix: force protobuf downgrade to avoid exception
20 | protobuf==3.20.2
21 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/data-science/src/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .model_loader import MODEL_ARCH_LIST, get_model_metadata, load_model
2 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/data-science/src/model/swin_models.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT license.
3 | # Original Author: Jeff Omhover (MSFT)
4 |
5 | """
6 | This script provides code to load and setup a variety of models from torchvision.models.
7 | """
8 | import logging
9 |
10 | import torch
11 | from transformers import SwinConfig, SwinForImageClassification
12 |
13 |
14 | def load_swin_model(
15 | model_arch: str, output_dimension: int = 1, pretrained: bool = True
16 | ):
17 | """Loads a model from a given arch and sets it up for training"""
18 | logger = logging.getLogger(__name__)
19 |
20 | logger.info(
21 | f"Loading model from arch={model_arch} pretrained={pretrained} output_dimension={output_dimension}"
22 | )
23 | if pretrained:
24 | model = SwinForImageClassification.from_pretrained(model_arch)
25 | else:
26 | model = SwinForImageClassification(config=SwinConfig())
27 |
28 | model.classifier = torch.nn.Linear(model.swin.num_features, output_dimension)
29 |
30 | return model
31 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/data-science/src/model/test_model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT license.
3 | # Original Author: Jeff Omhover (MSFT)
4 |
5 | """
6 | Creates a super simple 32x32 CNN model for testing.
7 | From the CIFAR10 tutorial https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
8 | """
9 | import logging
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 |
15 |
16 | class Net(nn.Module):
17 | def __init__(self, output_dimension):
18 | super().__init__()
19 | self.conv1 = nn.Conv2d(3, 6, 5)
20 | self.pool = nn.MaxPool2d(2, 2)
21 | self.conv2 = nn.Conv2d(6, 16, 5)
22 | self.fc1 = nn.Linear(16 * 5 * 5, 120)
23 | self.fc2 = nn.Linear(120, 84)
24 | self.fc3 = nn.Linear(84, output_dimension)
25 |
26 | def forward(self, x):
27 | x = self.pool(F.relu(self.conv1(x)))
28 | x = self.pool(F.relu(self.conv2(x)))
29 | x = torch.flatten(x, 1) # flatten all dimensions except batch
30 | x = F.relu(self.fc1(x))
31 | x = F.relu(self.fc2(x))
32 | x = self.fc3(x)
33 | return x
34 |
35 |
36 | def load_test_model(
37 | model_arch: str, output_dimension: int = 1, pretrained: bool = True
38 | ):
39 | """Loads a model from a given arch and sets it up for training"""
40 | return Net(output_dimension)
41 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/data-science/src/model/torchvision_models.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT license.
3 | # Original Author: Jeff Omhover (MSFT)
4 |
5 | """
6 | This script provides code to load and setup a variety of models from torchvision.models.
7 | """
8 | import logging
9 |
10 | import torch
11 | import torchvision.models as models
12 |
13 |
14 | def load_torchvision_model(
15 | model_arch: str, output_dimension: int = 1, pretrained: bool = True
16 | ):
17 | """Loads a model from a given arch and sets it up for training"""
18 | logger = logging.getLogger(__name__)
19 |
20 | logger.info(
21 | f"Loading model from arch={model_arch} pretrained={pretrained} output_dimension={output_dimension}"
22 | )
23 | if hasattr(models, model_arch):
24 | model = getattr(models, model_arch)(pretrained=pretrained)
25 | else:
26 | raise NotImplementedError(
27 | f"model_arch={model_arch} is not implemented in torchvision model zoo."
28 | )
29 |
30 | # see https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html
31 | if model_arch.startswith("resnet"):
32 | model.fc = torch.nn.Linear(model.fc.in_features, output_dimension)
33 | elif model_arch == "alexnet":
34 | model.classifier[6] = torch.nn.Linear(4096, output_dimension)
35 | elif model_arch.startswith("vgg"):
36 | model.classifier[6] = torch.nn.Linear(4096, output_dimension)
37 | elif model_arch.startswith("densenet"):
38 | model.classifier = torch.nn.Linear(1024, output_dimension)
39 | else:
40 | raise NotImplementedError(
41 | f"loading model_arch={model_arch} is not implemented yet in our custom code."
42 | )
43 |
44 | return model
45 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/data-science/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import logging
4 | import pytest
5 | import tempfile
6 | from unittest.mock import Mock
7 |
8 | SRC_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src"))
9 |
10 | if SRC_ROOT not in sys.path:
11 | logging.info(f"Adding {SRC_ROOT} to path")
12 | sys.path.append(str(SRC_ROOT))
13 |
14 |
15 | @pytest.fixture()
16 | def temporary_dir():
17 | """Creates a temporary directory for the tests"""
18 | temp_directory = tempfile.TemporaryDirectory()
19 | yield temp_directory.name
20 | temp_directory.cleanup()
21 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/data-science/tests/model/test_model_loader.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests running the model loader for every possible model in the list
3 | """
4 | import pytest
5 | import torch
6 |
7 | # local imports
8 | from model import (
9 | MODEL_ARCH_LIST,
10 | get_model_metadata,
11 | load_model,
12 | )
13 |
14 | # IMPORTANT: see conftest.py for fixtures
15 |
16 |
17 | @pytest.mark.parametrize("model_arch", MODEL_ARCH_LIST)
18 | def test_model_loader(model_arch):
19 | """Tests src/components/pytorch_image_classifier/model/"""
20 | model_metadata = get_model_metadata(model_arch)
21 |
22 | assert model_metadata is not None
23 | assert isinstance(model_metadata, dict)
24 | assert "library" in model_metadata
25 | assert "input_size" in model_metadata
26 |
27 | # using pretrained=False to avoid downloading each time we unit test
28 | model = load_model(model_arch, output_dimension=4, pretrained=False)
29 |
30 | assert model is not None
31 | assert isinstance(model, torch.nn.Module)
32 |
33 |
34 | def test_model_loader_failure():
35 | """Test asking for a model that deosn't exist"""
36 | with pytest.raises(NotImplementedError):
37 | get_model_metadata("not_a_model")
38 |
39 | with pytest.raises(NotImplementedError):
40 | load_model("not_a_model", output_dimension=4, pretrained=False)
41 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/mlops/azureml/deploy/online/online-deployment.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
2 | name: blue
3 | endpoint_name: dogs-classifier-online
4 | model: azureml:resnet-dogs-classifier@latest
5 | instance_type: Standard_DS2_v2
6 | instance_count: 1
7 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/mlops/azureml/deploy/online/online-endpoint.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
2 | name: dogs-classifier-online
3 | description: Stanford Dogs Classifier
4 | auth_mode: key
--------------------------------------------------------------------------------
/cv/aml-cli-v2/mlops/azureml/train/create_stanford_dogs_dataset.yaml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
2 |
3 | command: |
4 | tar xvfm ${{inputs.archive}} --no-same-owner -C ${{outputs.images}} #TODO: Split data into Train-Validate-Test
5 |
6 | inputs:
7 | archive:
8 | type: uri_file
9 | path: http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar
10 |
11 | outputs:
12 | images:
13 | type: uri_folder
14 | mode: upload
15 | path: azureml://datastores/workspaceblobstore/paths/azureml-vision-datasets/dogs/
16 |
17 | environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest
18 |
19 | compute: azureml:cpu-cluster
20 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/mlops/azureml/train/pipeline.yaml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
2 | type: pipeline
3 |
4 | #
5 | inputs:
6 | training_images:
7 | type: uri_folder
8 | mode: download # pick ro_mount, rw_mount or download
9 | path: azureml://datastores/workspaceblobstore/paths/azureml-vision-datasets/dogs/**
10 | # path: azureml://datastores/workspaceblobstore/paths/azureml-vision-datasets/places2/train//**
11 | validation_images: #TODO: Use different datasets for validation
12 | type: uri_folder
13 | mode: download # pick ro_mount, rw_mount or download
14 | path: azureml://datastores/workspaceblobstore/paths/azureml-vision-datasets/dogs/**
15 | # path: azureml://datastores/workspaceblobstore/paths/azureml-vision-datasets/places2/valid/**
16 | #
17 |
18 | #
19 | settings:
20 | default_datastore: azureml:workspaceblobstore
21 | continue_on_step_failure: true
22 |
23 | jobs:
24 | train:
25 | type: command
26 | component: file:train.yaml
27 | compute: azureml:gpu-cluster
28 | resources:
29 | instance_count: 1 # number of nodes
30 | distribution:
31 | type: pytorch
32 | process_count_per_instance: 1 # number of gpus
33 |
34 | # NOTE: set env var if needed
35 | environment_variables:
36 | NCCL_DEBUG: "INFO" # adjusts the level of info from NCCL tests
37 |
38 | # NCCL_TOPO_FILE: "/opt/microsoft/ndv4-topo.xml" # Use specific topology file for A100
39 |
40 | # NCCL_IB_PCI_RELAXED_ORDERING: "1" # Relaxed Ordering can greatly help the performance of Infiniband networks in virtualized environments.
41 | # NCCL_IB_DISABLE: "1" # force disable infiniband (if set to "1")
42 | # NCCL_NET_PLUGIN: "none" # to force NET/Plugin off (no rdma/sharp plugin at all)
43 | # NCCL_NET: "Socket" # to force node-to-node comm to use Socket (slow)
44 | # NCCL_SOCKET_IFNAME: "eth0" # to force Socket comm to use eth0 (use NCCL_NET=Socket)
45 |
46 | # UCX_IB_PCI_RELAXED_ORDERING: "on"
47 | # UCX_TLS: "tcp"
48 | # UCX_NET_DEVICES: "eth0" # if you have Error: Failed to resolve UCX endpoint...
49 |
50 | # CUDA_DEVICE_ORDER: "PCI_BUS_ID" # ordering of gpus
51 |
52 | # TORCH_DISTRIBUTED_DEBUG: "DETAIL"
53 |
54 | inputs:
55 | # data inputs
56 | train_images: ${{parent.inputs.training_images}}
57 | valid_images: ${{parent.inputs.validation_images}}
58 |
59 | # data loading
60 | batch_size: 64
61 | num_workers: 5
62 | prefetch_factor: 4
63 | persistent_workers: true
64 | pin_memory: true
65 | non_blocking: false
66 |
67 | # model
68 | model_arch: "resnet18"
69 | model_arch_pretrained: true
70 |
71 | # training
72 | num_epochs: 1
73 | learning_rate: 0.001
74 | momentum: 0.9
75 |
76 | # profiling
77 | enable_profiling: false
78 | # multiprocessing_sharing_strategy: "file_system" # WARNING: this can cause hang at job completion
79 |
80 | # Model Registrataion
81 | register_model_as: "resnet-dogs-classifier"
82 |
83 | #
84 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/mlops/azureml/train/train-env.yaml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
2 | name: nvidia_pytorch
3 | build:
4 | path: ../../../data-science/environment/
5 | tags:
6 | os: ubuntu
7 | os_version: 20.04
8 | hpcx: 2.10
9 | mpi: openmpi
10 | mpi_version: 4.1.2rc4
11 | ucx: 1.12.0
12 | cuda: 11.6.2
13 | cudnn: 8.4.0.27
14 | nccl: 2.12.10
15 | rdma_core: 36.0
16 | nsight_compute: 2022.1.1.2
17 | nsight_systems: "2022.2.1.31-5fe97ab"
18 | nccl_test: 2.11.0
19 | azureml-defaults: 1.41.0
20 | mlflow: 1.25.1
21 | transformers: 4.18.0
--------------------------------------------------------------------------------
/cv/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-model-training-pipeline
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 | - name: version
14 | value: aml-cli-v2
15 |
16 | trigger: none
17 |
18 | pool:
19 | vmImage: ubuntu-20.04
20 |
21 | resources:
22 | repositories:
23 | - repository: mlops-templates # Template Repo
24 | name: mlops-templates
25 | type: git
26 | ref: main
27 |
28 | stages:
29 | - stage: DeployTrainingPipeline
30 | displayName: Deploy Training Pipeline
31 | jobs:
32 | - job: DeployTrainingPipeline
33 | steps:
34 | - checkout: self
35 | path: s/
36 | - task: Bash@3
37 | displayName: "Create checkout repository folder(s)"
38 | inputs:
39 | targetType: "inline"
40 | script: |
41 | set -e
42 | mkdir "$(Build.Repository.Name)"
43 | mkdir "mlops-templates"
44 | - checkout: mlops-templates
45 | path: s/templates/
46 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
47 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
48 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
49 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
50 | parameters:
51 | cluster_name: gpu-cluster
52 | size: Standard_NC6
53 | min_instances: 0
54 | max_instances: 1
55 | cluster_tier: dedicated
56 | - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
57 | parameters:
58 | build_type: docker
59 | environment_name: nvidia_pytorch # Not used for docker builds
60 | environment_file: mlops/azureml/train/train-env.yaml
61 | - template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates
62 | parameters:
63 | data_type: training
64 | environment_file: mlops/azureml/train/create_stanford_dogs_dataset.yaml
65 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
66 | parameters:
67 | pipeline_file: mlops/azureml/train/pipeline.yaml
68 | experiment_name: $(environment)_cv_train_$(Build.SourceBranchName)
69 | display_name: $(environment)_cv_run_$(Build.BuildID)
70 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | variables:
5 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
6 | # 'main' branch: PRD environment
7 | - template: ../../config-infra-prod.yml
8 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
9 | # 'develop' or feature branches: DEV environment
10 | - template: ../../config-infra-dev.yml
11 | - name: version
12 | value: aml-cli-v2
13 | - name: endpoint_name
14 | value: dogs-online-$(namespace)$(postfix)$(environment)
15 | - name: endpoint_type
16 | value: online
17 |
18 | trigger: none
19 |
20 | pool:
21 | vmImage: ubuntu-20.04
22 |
23 | resources:
24 | repositories:
25 | - repository: mlops-templates # Template Repo
26 | name: mlops-templates
27 | type: git
28 | ref: main
29 |
30 | stages:
31 | - stage: CreateOnlineEndpoint
32 | displayName: Create/Update Online Endpoint
33 | jobs:
34 | - job: DeployOnlineEndpoint
35 | steps:
36 | - checkout: self
37 | path: s/
38 | - task: Bash@3
39 | displayName: "Create checkout repository folder(s)"
40 | inputs:
41 | targetType: "inline"
42 | script: |
43 | set -e
44 | mkdir "$(Build.Repository.Name)"
45 | mkdir "mlops-templates"
46 | - checkout: mlops-templates
47 | path: s/templates/
48 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
49 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
50 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
51 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
52 | parameters:
53 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
54 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
55 | parameters:
56 | deployment_name: dogs-online-dp
57 | deployment_file: mlops/azureml/deploy/online/online-deployment.yml
58 | - template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates
59 | parameters:
60 | traffic_allocation: dogs-online-dp=100
61 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
62 | parameters:
63 | deployment_name: dogs-online-dp
64 | sample_request: data/sample-request.json
65 | request_type: json
66 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/mlops/github-actions/deploy-model-training-pipeline.yml:
--------------------------------------------------------------------------------
1 | name: deploy-cv-model-training-pipeline
2 |
3 | on:
4 | workflow_dispatch:
5 | jobs:
6 | set-env-branch:
7 | runs-on: ubuntu-latest
8 | outputs:
9 | config-file: ${{ steps.set-output-defaults.outputs.config-file }}
10 | steps:
11 | - id: set-prod-branch
12 | name: set-prod-branch
13 | if: ${{ github.ref == 'refs/heads/main'}}
14 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV;
15 | - id: set-dev-branch
16 | name: setdevbranch
17 | if: ${{ github.ref != 'refs/heads/main'}}
18 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV;
19 | - id: set-output-defaults
20 | name: set-output-defaults
21 | run: |
22 | echo "config-file=$config_env" >> $GITHUB_OUTPUT;
23 | get-config:
24 | needs: set-env-branch
25 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main
26 | with:
27 | file_name: ${{ needs.set-env-branch.outputs.config-file}}
28 | create-dataprep-compute:
29 | needs: [get-config]
30 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main
31 | with:
32 | cluster_name: cpu-cluster
33 | size: Standard_DS3_v2
34 | min_instances: 0
35 | max_instances: 4
36 | cluster_tier: low_priority
37 | resource_group: ${{ needs.get-config.outputs.resource_group }}
38 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
39 | secrets:
40 | creds: ${{secrets.AZURE_CREDENTIALS}}
41 | create-training-compute:
42 | needs: get-config
43 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main
44 | with:
45 | cluster_name: gpu-cluster
46 | size: Standard_NC6
47 | min_instances: 0
48 | max_instances: 1
49 | cluster_tier: low_priority
50 | resource_group: ${{ needs.get-config.outputs.resource_group }}
51 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
52 | secrets:
53 | creds: ${{secrets.AZURE_CREDENTIALS}}
54 | register-environment:
55 | needs: [get-config, create-dataprep-compute, create-training-compute]
56 | uses: Azure/mlops-templates/.github/workflows/register-environment.yml@main
57 | with:
58 | resource_group: ${{ needs.get-config.outputs.resource_group }}
59 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
60 | environment_file: mlops/azureml/train/train-env.yaml
61 | secrets:
62 | creds: ${{secrets.AZURE_CREDENTIALS}}
63 | register-dataset:
64 | needs: [get-config, register-environment]
65 | uses: Azure/mlops-templates/.github/workflows/register-dataset.yml@main
66 | with:
67 | resource_group: ${{ needs.get-config.outputs.resource_group }}
68 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
69 | data_file: mlops/azureml/train/create_stanford_dogs_dataset.yaml
70 | file_type: Training
71 | name: stanford_dogs
72 | secrets:
73 | creds: ${{secrets.AZURE_CREDENTIALS}}
74 | run-model-training-pipeline:
75 | needs:
76 | [
77 | get-config,
78 | create-dataprep-compute,
79 | create-training-compute,
80 | register-environment,
81 | register-dataset,
82 | ]
83 | uses: Azure/mlops-templates/.github/workflows/run-pipeline.yml@main
84 | with:
85 | resource_group: ${{ needs.get-config.outputs.resource_group }}
86 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
87 | parameters-file: mlops/azureml/train/pipeline.yaml
88 | job-name: cv-train
89 | secrets:
90 | creds: ${{secrets.AZURE_CREDENTIALS}}
91 |
--------------------------------------------------------------------------------
/cv/aml-cli-v2/mlops/github-actions/deploy-online-endpoint-pipeline.yml:
--------------------------------------------------------------------------------
1 | name: deploy-online-endpoint-pipeline
2 |
3 | on:
4 | workflow_dispatch:
5 | jobs:
6 | set-env-branch:
7 | runs-on: ubuntu-latest
8 | outputs:
9 | config-file: ${{ steps.set-output-defaults.outputs.config-file }}
10 | steps:
11 | - id: set-prod-branch
12 | name: set-prod-branch
13 | if: ${{ github.ref == 'refs/heads/main'}}
14 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV;
15 | - id: set-dev-branch
16 | name: setdevbranch
17 | if: ${{ github.ref != 'refs/heads/main'}}
18 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV;
19 | - id: set-output-defaults
20 | name: set-output-defaults
21 | run: |
22 | echo "config-file=$config_env" >> $GITHUB_OUTPUT;
23 | get-config:
24 | needs: set-env-branch
25 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main
26 | with:
27 | file_name: ${{ needs.set-env-branch.outputs.config-file}}
28 | create-endpoint:
29 | needs: get-config
30 | uses: Azure/mlops-templates/.github/workflows/create-endpoint.yml@main
31 | with:
32 | resource_group: ${{ needs.get-config.outputs.resource_group }}
33 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
34 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
35 | endpoint_name: dogs-classifier-online2
36 | endpoint_type: online
37 | secrets:
38 | creds: ${{secrets.AZURE_CREDENTIALS}}
39 | create-deployment:
40 | uses: Azure/mlops-templates/.github/workflows/create-deployment.yml@main
41 | needs: [get-config, create-endpoint]
42 | with:
43 | resource_group: ${{ needs.get-config.outputs.resource_group }}
44 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
45 | endpoint_file: mlops/azureml/deploy/online/online-deployment.yml
46 | endpoint_name: dogs-classifier-online2
47 | endpoint_type: online
48 | deployment_name: dogs-online-dp
49 | secrets:
50 | creds: ${{secrets.AZURE_CREDENTIALS}}
51 | allocate-traffic:
52 | uses: Azure/mlops-templates/.github/workflows/allocate-traffic.yml@main
53 | needs: [get-config, create-deployment]
54 | with:
55 | resource_group: ${{ needs.get-config.outputs.resource_group }}
56 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
57 | traffic_allocation: dogs-online-dp=100
58 | endpoint_name: dogs-classifier-online2
59 | secrets:
60 | creds: ${{secrets.AZURE_CREDENTIALS}}
61 |
--------------------------------------------------------------------------------
/cv/python-sdk-v1/config-aml.yml:
--------------------------------------------------------------------------------
1 | variables:
2 |
3 | ap_vm_image: ubuntu-20.04
4 |
5 | ## Training pipeline settings
6 |
7 | # Training dataset settings
8 | training_dataset_name: dogs-imgs
9 | training_dataset_description: 'Stanford Dogs Dataset (http://vision.stanford.edu/aditya86/ImageNetDogs/)'
10 | training_dataset_local_path: data/training-imgs/
11 | training_dataset_path_on_datastore: dogs-imgs/
12 | training_dataset_type: local
13 | training_dataset_storage_url: 'http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar'
14 |
15 | labels_dataset_name: dogs-labels
16 | labels_dataset_description: 'Labels for Stanford Dogs Dataset (http://vision.stanford.edu/aditya86/ImageNetDogs/)'
17 | labels_dataset_local_path: data/training/
18 | labels_dataset_path_on_datastore: dogs-labels/
19 | labels_dataset_type: local
20 |
21 | # Training AzureML Environment settings
22 | training_env_name: nvidia_pytorch
23 | training_env_path: data-science/environment/training/
24 |
25 | # Compute target for pipeline
26 | training_target: gpu-cluster
27 | training_target_sku: Standard_NC6
28 | training_target_min_nodes: 0
29 | training_target_max_nodes: 1
30 |
31 | # Name for the training pipeline
32 | training_pipeline_name: resnet-dogs-training-pipeline
33 | training_experiment_name: resnet-dogs-training
34 |
35 | # Training arguments specification
36 | training_arguments: --epochs 2 --batch-size 64 --training-mode feature-extraction
37 |
38 | # Training datasets specification
39 | # Syntax: :::
40 | training_datasets: dogs-labels:1:download:prep dogs-imgs:latest:mount:train+eval
41 |
42 | # Name under which the model will be registered
43 | model_name: resnet-dogs-classifier
44 |
--------------------------------------------------------------------------------
/cv/python-sdk-v1/data-science/environment/training/azureml_environment.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "pytorch_manual",
3 | "environmentVariables": {
4 | "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
5 | },
6 | "python": {
7 | "userManagedDependencies": false,
8 | "interpreterPath": "python",
9 | "condaDependenciesFile": null,
10 | "baseCondaEnvironment": null
11 | },
12 | "docker": {
13 | "enabled": true,
14 | "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04",
15 | "baseDockerfile": null,
16 | "sharedVolumes": true,
17 | "shmSize": "2g",
18 | "arguments": [],
19 | "baseImageRegistry": {
20 | "address": null,
21 | "username": null,
22 | "password": null,
23 | "registryIdentity": null
24 | },
25 | "platform": {
26 | "os": "Linux",
27 | "architecture": "amd64"
28 | }
29 | },
30 | "spark": {
31 | "repositories": [],
32 | "packages": [],
33 | "precachePackages": true
34 | },
35 | "databricks": {
36 | "mavenLibraries": [],
37 | "pypiLibraries": [],
38 | "rcranLibraries": [],
39 | "jarLibraries": [],
40 | "eggLibraries": []
41 | },
42 | "r": null,
43 | "inferencingStackVersion": null
44 | }
--------------------------------------------------------------------------------
/cv/python-sdk-v1/data-science/environment/training/conda_dependencies.yml:
--------------------------------------------------------------------------------
1 | name: pytorch_manual
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.7
6 | - pip=20.2.4
7 | - pip:
8 | - pandas==1.3.5
9 | - scikit-learn==1.0.2
10 | - matplotlib==3.5.2
11 | - msrest==0.6.21
12 | - mlflow==1.27.0
13 | - azureml-core==1.43.0
14 | - azureml-defaults==1.43.0
15 | - azureml-mlflow==1.43.0
16 | - torch==1.11.0
17 | - torchvision==0.12.0
18 |
--------------------------------------------------------------------------------
/cv/python-sdk-v1/data-science/notebooks/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/cv/python-sdk-v1/data-science/notebooks/.gitkeep
--------------------------------------------------------------------------------
/cv/python-sdk-v1/data-science/src/model/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | from .dataset import CustomImageDataset
5 | from .net import load_model
6 |
--------------------------------------------------------------------------------
/cv/python-sdk-v1/data-science/src/model/dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | import os
5 | import PIL
6 |
7 | from torch.utils.data import Dataset
8 | import torchvision.transforms as transforms
9 |
10 |
11 | class CustomImageDataset(Dataset):
12 | def __init__(self, img_dir, img_labels, mode='test'):
13 | self.img_dir = img_dir
14 | self.img_labels = img_labels
15 | self.classes = img_labels.unique().tolist()
16 |
17 | self.mode = mode
18 | if self.mode == 'train':
19 | self.transform = transforms.Compose([
20 | transforms.RandomResizedCrop(224),
21 | transforms.RandomHorizontalFlip(),
22 | transforms.ToTensor()
23 | ])
24 | else:
25 | self.transform = transforms.Compose([
26 | transforms.Resize(256),
27 | transforms.CenterCrop(224),
28 | transforms.ToTensor(),
29 | ])
30 |
31 | def __len__(self):
32 | return len(self.img_labels)
33 |
34 | def __getitem__(self, idx):
35 |
36 | img_path = self.img_labels.index[idx]
37 | image = PIL.Image.open(os.path.join(self.img_dir, img_path)).convert('RGB')
38 | image = self.transform(image)
39 |
40 | img_label = self.img_labels[idx]
41 | img_class = self.classes.index(img_label)
42 |
43 | return image, img_class, img_path
44 |
45 | def nclasses(self):
46 | return len(self.classes)
47 |
48 | def get_labels(self, indexes):
49 | return [self.classes[i] for i in indexes]
50 |
--------------------------------------------------------------------------------
/cv/python-sdk-v1/data-science/src/model/net.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torchvision.models as models
7 | import torch.optim as optim
8 |
9 |
10 | def load_model(path=None, num_classes=2, mode='finetuning', learning_rate=0.001, momentum=0.9):
11 |
12 | # Load existing model
13 | if path:
14 | print('Loading existing model from path...')
15 | model_data = torch.load(path)
16 | model = models.resnet18(pretrained=False)
17 | model.fc = nn.Linear(model.fc.in_features, model_data['fc.weight'].shape[0])
18 | model.load_state_dict(model_data)
19 | return model
20 |
21 | # Initialize new model
22 | assert mode in ['finetuning', 'feature-extraction']
23 |
24 | model = models.resnet18(pretrained=True)
25 | if mode == 'feature-extraction': # Freeze layers
26 | for param in model.parameters():
27 | param.requires_grad = False
28 |
29 | model.fc = nn.Linear(model.fc.in_features, num_classes)
30 |
31 | criterion = nn.CrossEntropyLoss()
32 |
33 | params_optim = model.parameters() if mode == 'finetuning' else model.fc.parameters() if mode == 'feature-extraction' else None
34 | optimizer = optim.SGD(params_optim, lr=learning_rate, momentum=momentum)
35 |
36 | return model, criterion, optimizer
37 |
--------------------------------------------------------------------------------
/cv/python-sdk-v1/data-science/src/prep.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | import os
5 | import argparse
6 |
7 | import numpy as np
8 | import pandas as pd
9 | import mlflow
10 |
11 |
12 | def main(raw_data_path, prepared_data_path):
13 |
14 | print(f'Raw data path: {raw_data_path}')
15 | print(f'Output data path: {prepared_data_path}')
16 |
17 | # Read data
18 |
19 | labels_data = pd.read_csv(os.path.join(raw_data_path, 'image_labels.csv'))
20 |
21 | mlflow.log_metric('total_labels', len(labels_data))
22 |
23 | # Split data into train and test datasets
24 |
25 | random_data = np.random.rand(len(labels_data))
26 | labels_train = labels_data[random_data < 0.7]
27 | labels_test = labels_data[random_data >= 0.7]
28 |
29 | print(labels_train)
30 |
31 | mlflow.log_metric('train_size', labels_train.shape[0])
32 | mlflow.log_metric('test_size', labels_test.shape[0])
33 |
34 | labels_train.to_csv(os.path.join(prepared_data_path, 'labels_train.csv'), index=False)
35 | labels_test.to_csv(os.path.join(prepared_data_path, 'labels_test.csv'), index=False)
36 |
37 | print('Finished.')
38 |
39 |
40 | def parse_args(args_list=None):
41 | parser = argparse.ArgumentParser()
42 | parser.add_argument("--dogs-labels", type=str, required=True, help="Path to labels")
43 | parser.add_argument("--prepared_data_path", type=str, required=True, help="Path for prepared data")
44 |
45 | args_parsed, unknown = parser.parse_known_args(args_list)
46 | if unknown:
47 | print(f"Unrecognized arguments. These won't be used: {unknown}")
48 |
49 | return args_parsed
50 |
51 |
52 | if __name__ == "__main__":
53 | args = parse_args()
54 |
55 | main(
56 | raw_data_path=args.dogs_labels,
57 | prepared_data_path=args.prepared_data_path
58 | )
59 |
--------------------------------------------------------------------------------
/cv/python-sdk-v1/data-science/tests/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/mlops-project-template/24112dcd1fb58b1e7b1952001f028a1bcaab9dcd/cv/python-sdk-v1/data-science/tests/.gitkeep
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: azureml-cli-v2
2 | channels:
3 | - conda-forge
4 | - defaults
5 | dependencies:
6 | - python==3.8
7 | - yapf
8 | - pylint
9 | - pip
10 | - pip:
11 | - mlflow
12 | - cloudpickle==1.6.0
13 | - scikit-learn==0.24.2
14 | - flask==1.1.2
15 | - applicationinsights
16 | - pandas
17 | - azureml-core
18 | - azureml-dataset-runtime[fuse]
19 | - opencensus-ext-azure
20 |
--------------------------------------------------------------------------------
/infrastructure/bicep/bicepconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "analyzers": {
3 | "core": {
4 | "enabled": true,
5 | "verbose": true,
6 | "rules": {
7 | "no-hardcoded-env-urls": {
8 | "level": "error"
9 | },
10 | "no-unused-params": {
11 | "level": "error"
12 | },
13 | "no-unused-vars": {
14 | "level": "error"
15 | },
16 | "prefer-interpolation": {
17 | "level": "error"
18 | },
19 | "secure-parameter-default": {
20 | "level": "error"
21 | },
22 | "simplify-interpolation": {
23 | "level": "error"
24 | }
25 | }
26 | }
27 | }
28 | }
--------------------------------------------------------------------------------
/infrastructure/bicep/main.bicep:
--------------------------------------------------------------------------------
1 | targetScope = 'subscription'
2 |
3 | param location string = 'westus2'
4 | param prefix string
5 | param postfix string
6 | param env string
7 |
8 | param tags object = {
9 | Owner: 'mlops-v2'
10 | Project: 'mlops-v2'
11 | Environment: env
12 | Toolkit: 'bicep'
13 | Name: prefix
14 | }
15 |
16 | var baseName = '${prefix}-${postfix}${env}'
17 | var resourceGroupName = 'rg-${baseName}'
18 |
19 | resource rg 'Microsoft.Resources/resourceGroups@2020-06-01' = {
20 | name: resourceGroupName
21 | location: location
22 |
23 | tags: tags
24 | }
25 |
26 | // Storage Account
27 | module st './modules/storage_account.bicep' = {
28 | name: 'st'
29 | scope: resourceGroup(rg.name)
30 | params: {
31 | baseName: '${uniqueString(rg.id)}${env}'
32 | location: location
33 | tags: tags
34 | }
35 | }
36 |
37 | // Key Vault
38 | module kv './modules/key_vault.bicep' = {
39 | name: 'kv'
40 | scope: resourceGroup(rg.name)
41 | params: {
42 | baseName: baseName
43 | location: location
44 | tags: tags
45 | }
46 | }
47 |
48 | // App Insights
49 | module appi './modules/application_insights.bicep' = {
50 | name: 'appi'
51 | scope: resourceGroup(rg.name)
52 | params: {
53 | baseName: baseName
54 | location: location
55 | tags: tags
56 | }
57 | }
58 |
59 | // Container Registry
60 | module cr './modules/container_registry.bicep' = {
61 | name: 'cr'
62 | scope: resourceGroup(rg.name)
63 | params: {
64 | baseName: '${uniqueString(rg.id)}${env}'
65 | location: location
66 | tags: tags
67 | }
68 | }
69 |
70 | // AML workspace
71 | module mlw './modules/aml_workspace.bicep' = {
72 | name: 'mlw'
73 | scope: resourceGroup(rg.name)
74 | params: {
75 | baseName: baseName
76 | location: location
77 | stoacctid: st.outputs.stoacctOut
78 | kvid: kv.outputs.kvOut
79 | appinsightid: appi.outputs.appinsightOut
80 | crid: cr.outputs.crOut
81 | tags: tags
82 | }
83 | }
84 |
85 | // AML compute cluster
86 | module mlwcc './modules/aml_computecluster.bicep' = {
87 | name: 'mlwcc'
88 | scope: resourceGroup(rg.name)
89 | params: {
90 | location: location
91 | workspaceName: mlw.outputs.amlsName
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/infrastructure/bicep/modules/aml_computecluster.bicep:
--------------------------------------------------------------------------------
1 | param location string
2 | param computeClusterName string = 'cpu-cluster'
3 | param workspaceName string
4 |
5 | resource amlci 'Microsoft.MachineLearningServices/workspaces/computes@2020-09-01-preview' = {
6 | name: '${workspaceName}/${computeClusterName}'
7 | location: location
8 | properties: {
9 | computeType: 'AmlCompute'
10 | properties: {
11 | vmSize: 'Standard_DS3_v2'
12 | subnet: json('null')
13 | osType: 'Linux'
14 | scaleSettings: {
15 | maxNodeCount: 4
16 | minNodeCount: 0
17 | }
18 | }
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/infrastructure/bicep/modules/aml_workspace.bicep:
--------------------------------------------------------------------------------
1 | param baseName string
2 | param location string
3 | param stoacctid string
4 | param kvid string
5 | param appinsightid string
6 | param crid string
7 | param tags object
8 |
9 | // AML workspace
10 | resource amls 'Microsoft.MachineLearningServices/workspaces@2020-09-01-preview' = {
11 | name: 'mlw-${baseName}'
12 | location: location
13 | identity: {
14 | type: 'SystemAssigned'
15 | }
16 | sku: {
17 | tier: 'basic'
18 | name: 'basic'
19 | }
20 | properties: {
21 | storageAccount: stoacctid
22 | keyVault: kvid
23 | applicationInsights: appinsightid
24 | containerRegistry: crid
25 | encryption: {
26 | status: 'Disabled'
27 | keyVaultProperties: {
28 | keyIdentifier: ''
29 | keyVaultArmId: ''
30 | }
31 | }
32 | }
33 |
34 | tags: tags
35 | }
36 |
37 | output amlsName string = amls.name
38 |
--------------------------------------------------------------------------------
/infrastructure/bicep/modules/application_insights.bicep:
--------------------------------------------------------------------------------
1 | param baseName string
2 | param location string
3 | param tags object
4 |
5 | // App Insights
6 | resource appinsight 'Microsoft.Insights/components@2020-02-02-preview' = {
7 | name: 'appi-${baseName}'
8 | location: location
9 | kind: 'web'
10 | properties: {
11 | Application_Type: 'web'
12 | }
13 |
14 | tags: tags
15 | }
16 |
17 | output appinsightOut string = appinsight.id
18 |
--------------------------------------------------------------------------------
/infrastructure/bicep/modules/container_registry.bicep:
--------------------------------------------------------------------------------
1 | param baseName string
2 | param location string
3 | param tags object
4 |
5 | resource cr 'Microsoft.ContainerRegistry/registries@2020-11-01-preview' = {
6 | name: 'cr${baseName}'
7 | location: location
8 | sku: {
9 | name: 'Standard'
10 | }
11 |
12 | properties: {
13 | adminUserEnabled: true
14 | }
15 |
16 | tags: tags
17 | }
18 |
19 | output crOut string = cr.id
20 |
--------------------------------------------------------------------------------
/infrastructure/bicep/modules/key_vault.bicep:
--------------------------------------------------------------------------------
1 | param baseName string
2 | param location string
3 | param tags object
4 |
5 | // Key Vault
6 | resource kv 'Microsoft.KeyVault/vaults@2019-09-01' = {
7 | name: 'kv-${baseName}'
8 | location: location
9 | properties: {
10 | tenantId: subscription().tenantId
11 | sku: {
12 | name: 'standard'
13 | family: 'A'
14 | }
15 | accessPolicies: []
16 | }
17 |
18 | tags: tags
19 | }
20 |
21 | output kvOut string = kv.id
22 |
--------------------------------------------------------------------------------
/infrastructure/bicep/modules/storage_account.bicep:
--------------------------------------------------------------------------------
1 | param baseName string
2 | param location string
3 | param tags object
4 |
5 | // Storage Account
6 | resource stoacct 'Microsoft.Storage/storageAccounts@2019-04-01' = {
7 | name: 'st${baseName}'
8 | location: location
9 | sku: {
10 | name: 'Standard_LRS'
11 | }
12 | kind: 'StorageV2'
13 | properties: {
14 | encryption: {
15 | services: {
16 | blob: {
17 | enabled: true
18 | }
19 | file: {
20 | enabled: true
21 | }
22 | }
23 | keySource: 'Microsoft.Storage'
24 | }
25 | supportsHttpsTrafficOnly: true
26 | }
27 |
28 | tags: tags
29 | }
30 |
31 | output stoacctOut string = stoacct.id
32 |
--------------------------------------------------------------------------------
/infrastructure/bicep/pipelines/bicep-ado-deploy-infra.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: bicep-ado-deploy-infra
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 |
14 | trigger: none
15 |
16 | pool:
17 | vmImage: $(ap_vm_image)
18 |
19 | stages:
20 | - stage: Lint
21 | displayName: Lint and Preflight check
22 | jobs:
23 | - job: LintBicep
24 | displayName: Lint Bicep Code
25 | steps:
26 | - checkout: self
27 | - script: |
28 | az bicep build --file ./infrastructure/main.bicep
29 | name: LintBicepCode
30 | displayName: Run Bicep Linter
31 |
32 | - stage: PreflightValidation
33 | jobs:
34 | - job: ValidateBicepCode
35 | displayName: Validate Bicep Code
36 | steps:
37 | - task: AzureCli@2
38 | name: RunPreflightValidateion
39 | displayName: Run Preflight Validation
40 | inputs:
41 | azureSubscription: $(ado_service_connection_rg)
42 | scriptType: "bash"
43 | scriptLocation: "inlineScript"
44 | inlineScript: |
45 | az deployment sub validate \
46 | --name $(Build.DefinitionName) \
47 | --template-file ./infrastructure/main.bicep \
48 | --location $(location) \
49 | --parameters location=$(location) prefix=$(namespace) postfix=$(postfix) env=$(environment)
50 |
51 | - stage: CheckOutBicepAndDeploy
52 | displayName: Deploy AML Workspace
53 | jobs:
54 | - deployment: DevDeployBicep
55 | displayName: Deploy Bicep
56 | pool:
57 | vmImage: $(ap_vm_image)
58 | environment: $(environment)
59 | strategy:
60 | runOnce:
61 | deploy:
62 | steps:
63 | - checkout: self
64 | - task: AzureCLI@2
65 | displayName: Running ${{ variables.environment }} Deployment
66 | inputs:
67 | azureSubscription: $(ado_service_connection_rg)
68 | scriptType: bash
69 | scriptLocation: inlineScript
70 | inlineScript: |
71 | az --version
72 | echo "deploying bicep..."
73 | az deployment sub create \
74 | --name $(Build.DefinitionName) \
75 | --location $(location) \
76 | --template-file ./infrastructure/main.bicep \
77 | --parameters location=$(location) prefix=$(namespace) postfix=$(postfix) env=$(environment)
78 |
--------------------------------------------------------------------------------
/infrastructure/terraform/aml_deploy.tf:
--------------------------------------------------------------------------------
1 | # Resource group
2 |
3 | module "resource_group" {
4 | source = "./modules/resource-group"
5 |
6 | location = var.location
7 |
8 | prefix = var.prefix
9 | postfix = var.postfix
10 | env = var.environment
11 |
12 | tags = local.tags
13 | }
14 |
15 | # Azure Machine Learning workspace
16 |
17 | module "aml_workspace" {
18 | source = "./modules/aml-workspace"
19 |
20 | rg_name = module.resource_group.name
21 | location = module.resource_group.location
22 |
23 | prefix = var.prefix
24 | postfix = var.postfix
25 | env = var.environment
26 |
27 | storage_account_id = module.storage_account_aml.id
28 | key_vault_id = module.key_vault.id
29 | application_insights_id = module.application_insights.id
30 | container_registry_id = module.container_registry.id
31 |
32 | enable_aml_computecluster = var.enable_aml_computecluster
33 | storage_account_name = module.storage_account_aml.name
34 |
35 | tags = local.tags
36 | }
37 |
38 | # Storage account
39 |
40 | module "storage_account_aml" {
41 | source = "./modules/storage-account"
42 |
43 | rg_name = module.resource_group.name
44 | location = module.resource_group.location
45 |
46 | prefix = var.prefix
47 | postfix = var.postfix
48 | env = var.environment
49 |
50 | hns_enabled = false
51 | firewall_bypass = ["AzureServices"]
52 | firewall_virtual_network_subnet_ids = []
53 |
54 | tags = local.tags
55 | }
56 |
57 | # Key vault
58 |
59 | module "key_vault" {
60 | source = "./modules/key-vault"
61 |
62 | rg_name = module.resource_group.name
63 | location = module.resource_group.location
64 |
65 | prefix = var.prefix
66 | postfix = var.postfix
67 | env = var.environment
68 |
69 | tags = local.tags
70 | }
71 |
72 | # Application insights
73 |
74 | module "application_insights" {
75 | source = "./modules/application-insights"
76 |
77 | rg_name = module.resource_group.name
78 | location = module.resource_group.location
79 |
80 | prefix = var.prefix
81 | postfix = var.postfix
82 | env = var.environment
83 |
84 | tags = local.tags
85 | }
86 |
87 | # Container registry
88 |
89 | module "container_registry" {
90 | source = "./modules/container-registry"
91 |
92 | rg_name = module.resource_group.name
93 | location = module.resource_group.location
94 |
95 | prefix = var.prefix
96 | postfix = var.postfix
97 | env = var.environment
98 |
99 | tags = local.tags
100 | }
101 |
102 | module "data_explorer" {
103 | source = "./modules/data-explorer"
104 |
105 | rg_name = module.resource_group.name
106 | location = module.resource_group.location
107 |
108 | prefix = var.prefix
109 | postfix = var.postfix
110 | env = var.environment
111 | key_vault_id = module.key_vault.id
112 | enable_monitoring = var.enable_monitoring
113 |
114 | client_secret = var.client_secret
115 |
116 | tags = local.tags
117 | }
118 |
--------------------------------------------------------------------------------
/infrastructure/terraform/devops-pipelines/tf-ado-deploy-infra.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: tf-ado-deploy-infra
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 |
14 | trigger: none
15 |
16 | pool:
17 | vmImage: ubuntu-20.04
18 |
19 | resources:
20 | repositories:
21 | - repository: mlops-templates # Template Repo
22 | name: mlops-templates
23 | type: git
24 | ref: main
25 |
26 | stages:
27 | - stage: CreateStorageAccountForTerraformState
28 | displayName: Create Storage for Terraform
29 | jobs:
30 | - job: CreateStorageForTerraform
31 | displayName: Create Storage for Terraform
32 | steps:
33 | - checkout: self
34 | path: s/
35 | - task: Bash@3
36 | displayName: "Create checkout repository folder(s)"
37 | inputs:
38 | targetType: "inline"
39 | script: |
40 | set -e
41 | mkdir "$(Build.Repository.Name)"
42 | mkdir "mlops-templates"
43 | - checkout: mlops-templates
44 | path: s/templates/
45 | - template: templates/infra/create-resource-group.yml@mlops-templates
46 | - template: templates/infra/create-storage-account.yml@mlops-templates
47 | - template: templates/infra/create-storage-container.yml@mlops-templates
48 | - stage: DeployAzureMachineLearningRG
49 | displayName: Deploy AML Workspace
50 | jobs:
51 | - job: DeployAMLWorkspace
52 | displayName: Deploy Terraform
53 | steps:
54 | - checkout: self
55 | path: s/
56 | - task: Bash@3
57 | displayName: "Create checkout repository folder(s)"
58 | inputs:
59 | targetType: "inline"
60 | script: |
61 | set -e
62 | mkdir "$(Build.Repository.Name)"
63 | mkdir "mlops-templates"
64 | - checkout: mlops-templates
65 | path: s/templates/
66 | - template: templates/infra/create-sp-variables.yml@mlops-templates
67 | - template: templates/infra/install-terraform.yml@mlops-templates
68 | - template: templates/infra/run-terraform-init.yml@mlops-templates
69 | - template: templates/infra/run-terraform-validate.yml@mlops-templates
70 | - template: templates/infra/run-terraform-plan.yml@mlops-templates
71 | - template: templates/infra/run-terraform-apply.yml@mlops-templates
72 |
--------------------------------------------------------------------------------
/infrastructure/terraform/github-actions/tf-gha-deploy-infra.yml:
--------------------------------------------------------------------------------
1 | name: tf-gha-deploy-infra.yml
2 |
3 | on:
4 | #push:
5 | workflow_dispatch:
6 | env:
7 | config_env: "none"
8 | jobs:
9 | set-env-branch:
10 | runs-on: ubuntu-latest
11 | outputs:
12 | config-file: ${{ steps.set-output-defaults.outputs.config-file }}
13 | steps:
14 | - id: set-prod-branch
15 | name: set-prod-branch
16 | if: ${{ github.ref == 'refs/heads/main'}}
17 | run: echo "config_env=config-infra-prod.yml" >> $GITHUB_ENV;
18 | - id: set-dev-branch
19 | name: setdevbranch
20 | if: ${{ github.ref != 'refs/heads/main'}}
21 | run: echo "config_env=config-infra-dev.yml" >> $GITHUB_ENV;
22 | - id: set-output-defaults
23 | name: set-output-defaults
24 | run: |
25 | echo "config-file=$config_env" >> $GITHUB_OUTPUT;
26 | get-config:
27 | needs: set-env-branch
28 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main
29 | with:
30 | file_name: ${{ needs.set-env-branch.outputs.config-file}}
31 | test-terraform-state-deployment:
32 | needs: [get-config, set-env-branch]
33 | uses: Azure/mlops-templates/.github/workflows/tf-gha-install-terraform.yml@main
34 | with:
35 | TFAction: "apply"
36 | dply_environment: ${{ needs.set-env-branch.outputs.config-file }}
37 | location: ${{ needs.get-config.outputs.location }}
38 | namespace: ${{ needs.get-config.outputs.namespace }}
39 | postfix: ${{ needs.get-config.outputs.postfix }}
40 | environment: ${{ needs.get-config.outputs.environment }}
41 | enable_aml_computecluster: ${{ needs.get-config.outputs.enable_aml_computecluster == true }} ## TODO review the evaluation of boolean
42 | enable_monitoring: ${{ needs.get-config.outputs.enable_monitoring == true }} ## TODO review the evaluation of boolean
43 | terraform_version: ${{ needs.get-config.outputs.terraform_version }}
44 | terraform_workingdir: ${{ needs.get-config.outputs.terraform_workingdir }}
45 | terraform_st_location: ${{ needs.get-config.outputs.terraform_st_location }}
46 | terraform_st_storage_account: ${{ needs.get-config.outputs.terraform_st_storage_account }}
47 | terraform_st_resource_group: ${{ needs.get-config.outputs.terraform_st_resource_group }}
48 | terraform_st_container_name: ${{ needs.get-config.outputs.terraform_st_container_name }}
49 | terraform_st_key: ${{ needs.get-config.outputs.terraform_st_key }}
50 | terraform_plan_location: ${{ needs.get-config.outputs.location }}
51 | terraform_plan_vnet: "TBD" # TBD
52 | secrets:
53 | azure_creds: ${{ secrets.AZURE_CREDENTIALS }}
54 | clientId: ${{ secrets.ARM_CLIENT_ID }}
55 | clientSecret: ${{ secrets.ARM_CLIENT_SECRET }}
56 | subscriptionId: ${{ secrets.ARM_SUBSCRIPTION_ID }}
57 | tenantId: ${{ secrets.ARM_TENANT_ID }}
58 | deploy-azureml-resources:
59 | runs-on: ubuntu-latest
60 | steps:
61 | - id: deploy-aml-workspace
62 | name: deploy-aml-workspace
63 | run: echo "OK"
64 |
--------------------------------------------------------------------------------
/infrastructure/terraform/locals.tf:
--------------------------------------------------------------------------------
1 | locals {
2 | tags = {
3 | Owner = "mlops-v2"
4 | Project = "mlops-v2"
5 | Environment = "${var.environment}"
6 | Toolkit = "terraform"
7 | Name = "${var.prefix}"
8 | }
9 | }
--------------------------------------------------------------------------------
/infrastructure/terraform/main.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | backend "azurerm" {}
3 | required_providers {
4 | azurerm = {
5 | version = "= 2.99.0"
6 | }
7 | }
8 | }
9 |
10 | provider "azurerm" {
11 | features {}
12 | }
13 |
14 | data "azurerm_client_config" "current" {}
15 |
16 | data "http" "ip" {
17 | url = "https://ifconfig.me"
18 | }
--------------------------------------------------------------------------------
/infrastructure/terraform/modules/aml-workspace/main.tf:
--------------------------------------------------------------------------------
1 | resource "azurerm_machine_learning_workspace" "mlw" {
2 | name = "mlw-${var.prefix}-${var.postfix}${var.env}"
3 | location = var.location
4 | resource_group_name = var.rg_name
5 | application_insights_id = var.application_insights_id
6 | key_vault_id = var.key_vault_id
7 | storage_account_id = var.storage_account_id
8 | container_registry_id = var.container_registry_id
9 |
10 | sku_name = "Basic"
11 |
12 | identity {
13 | type = "SystemAssigned"
14 | }
15 |
16 | tags = var.tags
17 | }
18 |
19 | # Compute cluster
20 |
21 | resource "azurerm_machine_learning_compute_cluster" "adl_aml_ws_compute_cluster" {
22 | name = "cpu-cluster"
23 | location = var.location
24 | vm_priority = "LowPriority"
25 | vm_size = "Standard_DS3_v2"
26 | machine_learning_workspace_id = azurerm_machine_learning_workspace.mlw.id
27 | count = var.enable_aml_computecluster ? 1 : 0
28 |
29 | scale_settings {
30 | min_node_count = 0
31 | max_node_count = 4
32 | scale_down_nodes_after_idle_duration = "PT120S" # 120 seconds
33 | }
34 | }
35 |
36 | # # Datastore
37 |
38 | # resource "azurerm_resource_group_template_deployment" "arm_aml_create_datastore" {
39 | # name = "arm_aml_create_datastore"
40 | # resource_group_name = var.rg_name
41 | # deployment_mode = "Incremental"
42 | # parameters_content = jsonencode({
43 | # "WorkspaceName" = {
44 | # value = azurerm_machine_learning_workspace.mlw.name
45 | # },
46 | # "StorageAccountName" = {
47 | # value = var.storage_account_name
48 | # }
49 | # })
50 |
51 | # depends_on = [time_sleep.wait_30_seconds]
52 |
53 | # template_content = < baseline_metrics[args.reference_metric]
67 | )
68 |
69 | logger.info("baseline_metrics[{}]={}, candidate_metrics[{}]={}, deploy_flag={} (force_comparison={})".format(
70 | args.reference_metric,
71 | baseline_metrics[args.reference_metric],
72 | args.reference_metric,
73 | candidate_metrics[args.reference_metric],
74 | deploy_flag,
75 | args.force_comparison
76 | ))
77 |
78 | # save deploy_flag as a file
79 | os.makedirs(args.deploy_flag, exist_ok=True)
80 | with open(os.path.join(args.deploy_flag, "deploy_flag"), "w") as out_file:
81 | out_file.write("%d" % int(deploy_flag))
82 |
83 | # Stop Logging
84 | mlflow.end_run()
85 |
86 |
87 | if __name__ == "__main__":
88 | main()
89 |
--------------------------------------------------------------------------------
/nlp/aml-cli-v2/data-science/src/summarization/register.py:
--------------------------------------------------------------------------------
1 | from azureml.core import Run
2 | from azureml.core.model import Model
3 |
4 | import os
5 | import argparse
6 | import logging
7 | import mlflow
8 |
9 |
10 | def main():
11 | """Main function of the script."""
12 | # initialize root logger
13 | logger = logging.getLogger()
14 | logger.setLevel(logging.INFO)
15 | console_handler = logging.StreamHandler()
16 | formatter = logging.Formatter(
17 | "%(asctime)s : %(levelname)s : %(name)s : %(message)s"
18 | )
19 | console_handler.setFormatter(formatter)
20 | logger.addHandler(console_handler)
21 |
22 | # input and output arguments
23 | parser = argparse.ArgumentParser()
24 | parser.add_argument(
25 | "--model_folder",
26 | type=str,
27 | required=True,
28 | help="folder containing model",
29 | )
30 | parser.add_argument(
31 | "--register_as",
32 | type=str,
33 | required=True,
34 | help="name to use for model registration in AzureML",
35 | )
36 | parser.add_argument(
37 | "--deploy_flag", type=str, required=True, help="a deploy flag whether to deploy or not"
38 | )
39 |
40 | args = parser.parse_args()
41 | logger.info(f"Running with arguments: {args}")
42 |
43 | # Start Logging
44 | mlflow.start_run()
45 |
46 | if os.path.isfile(args.deploy_flag):
47 | deploy_flag_file_path = args.deploy_flag
48 | else:
49 | deploy_flag_file_path = os.path.join(args.deploy_flag, "deploy_flag")
50 |
51 | logger.info(f"Opening deploy_flag file from {deploy_flag_file_path}")
52 | with open(deploy_flag_file_path, 'rb') as in_file:
53 | deploy_flag = bool(int(in_file.read()))
54 |
55 | if deploy_flag:
56 | logger.info(f"Deploy flag is True, registering model as {args.register_as}...")
57 | run = Run.get_context()
58 |
59 | # if we're running locally, except
60 | if run.__class__.__name__ == "_OfflineRun":
61 | raise Exception("You can't run this script locally, you will need to run it as an AzureML job.")
62 |
63 | _ = Model.register(
64 | run.experiment.workspace,
65 | model_name=args.register_as,
66 | model_path=args.model_folder,
67 | tags={
68 | "type": "huggingface",
69 | "task": "summarization"
70 | },
71 | description="Huggingface model finetuned for summarization",
72 | )
73 | else:
74 | logger.info(f"Deploy flag is False, pass.")
75 |
76 | # Stop Logging
77 | mlflow.end_run()
78 |
79 |
80 | if __name__ == "__main__":
81 | main()
82 |
83 |
--------------------------------------------------------------------------------
/nlp/aml-cli-v2/data-science/src/summarization/score.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | import json
4 | from transformers import (
5 | AutoModelForSeq2SeqLM,
6 | AutoTokenizer,
7 | )
8 |
9 | def init():
10 | """
11 | This function is called when the container is initialized/started, typically after create/update of the deployment.
12 | You can write the logic here to perform init operations like caching the model in memory
13 | """
14 | global model, tokenizer
15 | # AZUREML_MODEL_DIR is an environment variable created during deployment.
16 | # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
17 | model_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), os.listdir(os.getenv("AZUREML_MODEL_DIR"))[0])
18 | print("model_path")
19 | print(os.listdir(model_path))
20 | model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
21 | tokenizer = AutoTokenizer.from_pretrained(model_path)
22 | print("Init complete")
23 |
24 |
25 | def run(raw_data):
26 | global model, tokenizer
27 | """
28 | This function is called for every invocation of the endpoint to perform the actual scoring/prediction.
29 | In the example we extract the data from the json input and call the scikit-learn model's predict()
30 | method and return the result back
31 | """
32 | logging.info("Request received")
33 | article = json.loads(raw_data)["data"]
34 | if "t5" in model.config.architectures[0].lower():
35 | article= "summarize:" + article
36 |
37 | inputs = tokenizer(article, return_tensors="pt", max_length=512, truncation=True)
38 | outputs = model.generate(
39 | inputs["input_ids"], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True
40 | )
41 | result = tokenizer.decode(outputs[0])
42 | print(result)
43 | logging.info("Request processed")
44 | return result
45 |
--------------------------------------------------------------------------------
/nlp/aml-cli-v2/mlops/azureml/deploy/online/online-deployment.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
2 |
3 | # reference to the endpoint
4 | name: green
5 | endpoint_name: nlp-summarize-online
6 |
7 | # compute
8 | instance_type: STANDARD_DS5_V2
9 | instance_count: 1
10 |
11 | # model
12 | model: azureml:pubmed-summarization@latest
13 |
14 | # scoring code
15 | code_configuration:
16 | code: ../../../../data-science/src/summarization/
17 | scoring_script: score.py
18 |
19 | # custom scoring environment
20 | environment:
21 | conda_file: ../../../../data-science/environments/inference/conda_env.yml
22 | image: mcr.microsoft.com/azureml/minimal-ubuntu18.04-py37-cpu-inference:latest
23 |
--------------------------------------------------------------------------------
/nlp/aml-cli-v2/mlops/azureml/deploy/online/online-endpoint.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
2 | name: nlp-summarize-online
3 | description: summarization model
4 | auth_mode: key
5 |
--------------------------------------------------------------------------------
/nlp/aml-cli-v2/mlops/azureml/train/train-env.yml:
--------------------------------------------------------------------------------
1 |
2 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
3 | name: nlp_summarization_train
4 | version: mlopsv2-july2022
5 | build:
6 | path: ../../../data-science/environments/training/
7 |
--------------------------------------------------------------------------------
/nlp/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-model-training-pipeline
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 | - name: version
14 | value: aml-cli-v2
15 |
16 | trigger: none
17 |
18 | pool:
19 | vmImage: ubuntu-20.04
20 |
21 | resources:
22 | repositories:
23 | - repository: mlops-templates # Template Repo
24 | name: mlops-templates
25 | type: git
26 | ref: main
27 |
28 | stages:
29 | - stage: DeployTrainingPipeline
30 | displayName: Deploy Training Pipeline
31 | jobs:
32 | - job: DeployTrainingPipeline
33 | steps:
34 | - checkout: self
35 | path: s/
36 | - task: Bash@3
37 | displayName: "Create checkout repository folder(s)"
38 | inputs:
39 | targetType: "inline"
40 | script: |
41 | set -e
42 | mkdir "$(Build.Repository.Name)"
43 | mkdir "mlops-templates"
44 | - checkout: mlops-templates
45 | path: s/templates/
46 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
47 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
48 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
49 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
50 | parameters:
51 | cluster_name: cpu-cluster
52 | size: STANDARD_DS3_V2
53 | min_instances: 0
54 | max_instances: 1
55 | cluster_tier: dedicated
56 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
57 | parameters:
58 | cluster_name: cpu-cluster-lg
59 | size: Standard_D14_v2
60 | min_instances: 0
61 | max_instances: 1
62 | cluster_tier: dedicated
63 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
64 | parameters:
65 | cluster_name: gpu-cluster
66 | size: Standard_NV6
67 | min_instances: 0
68 | max_instances: 1
69 | cluster_tier: dedicated
70 | - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
71 | parameters:
72 | environment_name: nlp_summarization_train
73 | environment_file: mlops/azureml/train/train-env.yml
74 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
75 | parameters:
76 | pipeline_file: mlops/azureml/train/pipeline.yml
77 | experiment_name: $(environment)_nlp_summarization_$(Build.SourceBranchName)
78 | display_name: $(environment)_nlp_summarization_$(Build.BuildID)
79 |
--------------------------------------------------------------------------------
/nlp/aml-cli-v2/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | variables:
5 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
6 | # 'main' branch: PRD environment
7 | - template: ../../config-infra-prod.yml
8 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
9 | # 'develop' or feature branches: DEV environment
10 | - template: ../../config-infra-dev.yml
11 | - name: version
12 | value: aml-cli-v2
13 | - name: endpoint_name
14 | value: nlp-online-$(namespace)$(postfix)$(environment)
15 | - name: endpoint_type
16 | value: online
17 |
18 | trigger: none
19 |
20 | pool:
21 | vmImage: ubuntu-20.04
22 |
23 | resources:
24 | repositories:
25 | - repository: mlops-templates # Template Repo
26 | name: mlops-templates
27 | type: git
28 | ref: main
29 |
30 | stages:
31 | - stage: CreateOnlineEndpoint
32 | displayName: Create/Update Online Endpoint
33 | jobs:
34 | - job: DeployOnlineEndpoint
35 | steps:
36 | - checkout: self
37 | path: s/
38 | - task: Bash@3
39 | displayName: "Create checkout repository folder(s)"
40 | inputs:
41 | targetType: "inline"
42 | script: |
43 | set -e
44 | mkdir "$(Build.Repository.Name)"
45 | mkdir "mlops-templates"
46 | - checkout: mlops-templates
47 | path: s/templates/
48 | - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
49 | - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
50 | - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
51 | - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
52 | parameters:
53 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
54 | - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
55 | parameters:
56 | deployment_name: nlp-summarization-online-dp
57 | deployment_file: mlops/azureml/deploy/online/online-deployment.yml
58 | - template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates
59 | parameters:
60 | traffic_allocation: nlp-summarization-online-dp=100
61 | - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
62 | parameters:
63 | deployment_name: nlp-summarization-online-dp
64 | sample_request: data/nlp-summarization-request.json
65 | request_type: json
66 |
--------------------------------------------------------------------------------
/nlp/aml-cli-v2/mlops/github-actions/deploy-model-training-pipeline.yml:
--------------------------------------------------------------------------------
1 | name: deploy-model-training-pipeline
2 |
3 | on:
4 | workflow_dispatch:
5 | jobs:
6 | get-config:
7 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main
8 | with:
9 | file_name: config-infra-prod.yml
10 | create-compute-standard:
11 | needs: get-config
12 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main
13 | with:
14 | cluster_name: cpu-cluster
15 | size: STANDARD_DS3_V2
16 | min_instances: 0
17 | max_instances: 1
18 | resource_group: ${{ needs.get-config.outputs.resource_group }}
19 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
20 | secrets:
21 | creds: ${{secrets.AZURE_CREDENTIALS}}
22 | create-compute-large:
23 | needs: get-config
24 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main
25 | with:
26 | cluster_name: cpu-cluster-lg
27 | size: Standard_D14_v2
28 | min_instances: 0
29 | max_instances: 1
30 | resource_group: ${{ needs.get-config.outputs.resource_group }}
31 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
32 | secrets:
33 | creds: ${{secrets.AZURE_CREDENTIALS}}
34 | create-compute-gpu:
35 | needs: get-config
36 | uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main
37 | with:
38 | cluster_name: gpu-cluster
39 | size: Standard_NV6
40 | cluster_tier: low_priority
41 | min_instances: 0
42 | max_instances: 1
43 | resource_group: ${{ needs.get-config.outputs.resource_group }}
44 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
45 | secrets:
46 | creds: ${{secrets.AZURE_CREDENTIALS}}
47 | register-environment:
48 | needs: [get-config,create-compute-standard,create-compute-large,create-compute-gpu]
49 | uses: Azure/mlops-templates/.github/workflows/register-environment.yml@main
50 | with:
51 | resource_group: ${{ needs.get-config.outputs.resource_group }}
52 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
53 | environment_file: mlops/azureml/train/train-env.yaml
54 | secrets:
55 | creds: ${{secrets.AZURE_CREDENTIALS}}
56 | run-pipeline:
57 | needs: [get-config,create-compute-standard,create-compute-large,create-compute-gpu,register-environment]
58 | uses: Azure/mlops-templates/.github/workflows/run-pipeline.yml@main
59 | with:
60 | resource_group: ${{ needs.get-config.outputs.resource_group }}
61 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
62 | parameters-file: mlops/azureml/train/pipeline.yml
63 | job-name: nlp_summerization
64 | secrets:
65 | creds: ${{secrets.AZURE_CREDENTIALS}}
66 |
--------------------------------------------------------------------------------
/nlp/aml-cli-v2/mlops/github-actions/deploy-online-endpoint-pipeline.yml:
--------------------------------------------------------------------------------
1 | name: deploy-online-endpoint-pipeline
2 |
3 | on:
4 | workflow_dispatch:
5 | jobs:
6 | get-config:
7 | uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main
8 | with:
9 | file_name: config-infra-prod.yml
10 | create-endpoint:
11 | needs: get-config
12 | uses: Azure/mlops-templates/.github/workflows/create-endpoint.yml@main
13 | with:
14 | resource_group: ${{ needs.get-config.outputs.resource_group }}
15 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
16 | endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
17 | endpoint_name: nlp-summarization-online-dp
18 | endpoint_type: online
19 | secrets:
20 | creds: ${{secrets.AZURE_CREDENTIALS}}
21 | create-deployment:
22 | uses: Azure/mlops-templates/.github/workflows/create-deployment.yml@main
23 | needs: [get-config,create-endpoint]
24 | with:
25 | resource_group: ${{ needs.get-config.outputs.resource_group }}
26 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
27 | endpoint_file: mlops/azureml/deploy/online/online-deployment.yml
28 | endpoint_name: nlp-summarization-online-dp
29 | endpoint_type: online
30 | deployment_name: nlp-summarization-online-dp
31 | secrets:
32 | creds: ${{secrets.AZURE_CREDENTIALS}}
33 | allocate-traffic:
34 | uses: Azure/mlops-templates/.github/workflows/allocate-traffic.yml@main
35 | needs: [get-config,create-deployment]
36 | with:
37 | resource_group: ${{ needs.get-config.outputs.resource_group }}
38 | workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
39 | traffic_allocation: nlp-summarization-online-dp=100
40 | endpoint_name: nlp-summarization-online-dp
41 | secrets:
42 | creds: ${{secrets.AZURE_CREDENTIALS}}
43 |
--------------------------------------------------------------------------------
/nlp/python-sdk-v2/data-science/environments/inference/conda_env.yml:
--------------------------------------------------------------------------------
1 | name: nlp_inference_conda_env
2 | channels:
3 | - pytorch
4 | - anaconda
5 | - defaults
6 | - conda-forge
7 | dependencies:
8 | - python=3.8
9 | - pip=21.2.4
10 | - pytorch=1.10.0
11 | - torchvision=0.11.1
12 | - torchaudio=0.10.0
13 | - cudatoolkit=11.1.1
14 | - nvidia-apex=0.1.0
15 | - gxx_linux-64=8.5.0
16 | - pip:
17 | - azureml-defaults==1.39.0
18 | - azureml-mlflow==1.39.0
19 | - azureml-telemetry==1.39.0
20 | - azureml-train-core==1.39.0
21 | - mlflow==1.24.0
22 | - transformers==4.17.0
23 | - 'inference-schema[numpy-support]==1.3.0'
24 | - applicationinsights==0.11.10
25 |
--------------------------------------------------------------------------------
/nlp/python-sdk-v2/data-science/environments/training/Dockerfile:
--------------------------------------------------------------------------------
1 | # check release notes https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
2 | FROM nvcr.io/nvidia/pytorch:22.04-py3
3 |
4 | # Install dependencies missing in this container
5 | # NOTE: container already has matplotlib==3.5.1 tqdm==4.62.0
6 | COPY requirements.txt ./
7 | RUN pip install -r requirements.txt
8 |
--------------------------------------------------------------------------------
/nlp/python-sdk-v2/data-science/environments/training/requirements.txt:
--------------------------------------------------------------------------------
1 | # data science requirements
2 | # torchvision==0.12.0
3 | # torch==1.11.0
4 | pytorch_lightning==1.6.4
5 | transformers==4.18.0
6 | datasets==2.3.2
7 | rouge_score==0.0.4
8 | sentencepiece==0.1.96
9 |
10 | # for metrics reporting/plotting
11 | mlflow==2.3.1
12 | azureml-mlflow==1.41.0
13 | # matplotlib==3.5.2
14 | # tqdm==4.64.0
15 | psutil==5.9.0
16 |
17 | # for unit testing
18 | pytest==7.1.2
19 |
20 | # for azure ml SDK v2
21 | azure-ai-ml==1.1.0
--------------------------------------------------------------------------------
/nlp/python-sdk-v2/data-science/src/summarization/compare.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import logging
4 | import mlflow
5 | import json
6 | from distutils.util import strtobool
7 |
8 | def main():
9 | """Main function of the script."""
10 | # initialize root logger
11 | logger = logging.getLogger()
12 | logger.setLevel(logging.INFO)
13 | console_handler = logging.StreamHandler()
14 | formatter = logging.Formatter(
15 | "%(asctime)s : %(levelname)s : %(name)s : %(message)s"
16 | )
17 | console_handler.setFormatter(formatter)
18 | logger.addHandler(console_handler)
19 |
20 | # input and output arguments
21 | parser = argparse.ArgumentParser()
22 | parser.add_argument(
23 | "--baseline_metrics",
24 | type=str,
25 | required=True,
26 | help="path to baseline metrics folder containing all_results.json",
27 | )
28 | parser.add_argument(
29 | "--candidate_metrics",
30 | type=str,
31 | required=True,
32 | help="path to candidate metrics folder containing all_results.json",
33 | )
34 | parser.add_argument(
35 | "--reference_metric",
36 | type=str,
37 | default="predict_rougeLsum",
38 | help="name of reference metric for shipping flag (default: predict_rougeLsum)",
39 | )
40 | parser.add_argument(
41 | "--force_comparison", type=strtobool, default=False, help="set to True to bypass comparison and set --deploy_flag to True"
42 | )
43 | parser.add_argument(
44 | "--deploy_flag", type=str, help="a deploy flag whether to deploy or not"
45 | )
46 |
47 | args = parser.parse_args()
48 |
49 | # Start Logging
50 | mlflow.start_run()
51 |
52 | logger.info(f"Running with arguments: {args}")
53 |
54 | # open metrics on both sides
55 | with open(os.path.join(args.baseline_metrics, "all_results.json")) as in_file:
56 | baseline_metrics = json.loads(in_file.read())
57 | with open(os.path.join(args.candidate_metrics, "all_results.json")) as in_file:
58 | candidate_metrics = json.loads(in_file.read())
59 |
60 | # should we ship or not?
61 | if args.force_comparison:
62 | deploy_flag = True
63 | else:
64 | deploy_flag = (
65 | candidate_metrics[args.reference_metric]
66 | > baseline_metrics[args.reference_metric]
67 | )
68 |
69 | logger.info("baseline_metrics[{}]={}, candidate_metrics[{}]={}, deploy_flag={} (force_comparison={})".format(
70 | args.reference_metric,
71 | baseline_metrics[args.reference_metric],
72 | args.reference_metric,
73 | candidate_metrics[args.reference_metric],
74 | deploy_flag,
75 | args.force_comparison
76 | ))
77 |
78 | # save deploy_flag as a file
79 | os.makedirs(args.deploy_flag, exist_ok=True)
80 | with open(os.path.join(args.deploy_flag, "deploy_flag"), "w") as out_file:
81 | out_file.write("%d" % int(deploy_flag))
82 |
83 | # Stop Logging
84 | mlflow.end_run()
85 |
86 |
87 | if __name__ == "__main__":
88 | main()
89 |
--------------------------------------------------------------------------------
/nlp/python-sdk-v2/data-science/src/summarization/register.py:
--------------------------------------------------------------------------------
1 | from azureml.core import Run
2 | from azureml.core.model import Model
3 |
4 | import os
5 | import argparse
6 | import logging
7 | import mlflow
8 |
9 |
10 | def main():
11 | """Main function of the script."""
12 | # initialize root logger
13 | logger = logging.getLogger()
14 | logger.setLevel(logging.INFO)
15 | console_handler = logging.StreamHandler()
16 | formatter = logging.Formatter(
17 | "%(asctime)s : %(levelname)s : %(name)s : %(message)s"
18 | )
19 | console_handler.setFormatter(formatter)
20 | logger.addHandler(console_handler)
21 |
22 | # input and output arguments
23 | parser = argparse.ArgumentParser()
24 | parser.add_argument(
25 | "--model_folder",
26 | type=str,
27 | required=True,
28 | help="folder containing model",
29 | )
30 | parser.add_argument(
31 | "--register_as",
32 | type=str,
33 | required=True,
34 | help="name to use for model registration in AzureML",
35 | )
36 | parser.add_argument(
37 | "--deploy_flag", type=str, required=True, help="a deploy flag whether to deploy or not"
38 | )
39 |
40 | args = parser.parse_args()
41 | logger.info(f"Running with arguments: {args}")
42 |
43 | # Start Logging
44 | mlflow.start_run()
45 |
46 | if os.path.isfile(args.deploy_flag):
47 | deploy_flag_file_path = args.deploy_flag
48 | else:
49 | deploy_flag_file_path = os.path.join(args.deploy_flag, "deploy_flag")
50 |
51 | logger.info(f"Opening deploy_flag file from {deploy_flag_file_path}")
52 | with open(deploy_flag_file_path, 'rb') as in_file:
53 | deploy_flag = bool(int(in_file.read()))
54 |
55 | if deploy_flag:
56 | logger.info(f"Deploy flag is True, registering model as {args.register_as}...")
57 | run = Run.get_context()
58 |
59 | # if we're running locally, except
60 | if run.__class__.__name__ == "_OfflineRun":
61 | raise Exception("You can't run this script locally, you will need to run it as an AzureML job.")
62 |
63 | _ = Model.register(
64 | run.experiment.workspace,
65 | model_name=args.register_as,
66 | model_path=args.model_folder,
67 | tags={
68 | "type": "huggingface",
69 | "task": "summarization"
70 | },
71 | description="Huggingface model finetuned for summarization",
72 | )
73 | else:
74 | logger.info(f"Deploy flag is False, pass.")
75 |
76 | # Stop Logging
77 | mlflow.end_run()
78 |
79 |
80 | if __name__ == "__main__":
81 | main()
82 |
83 |
--------------------------------------------------------------------------------
/nlp/python-sdk-v2/data-science/src/summarization/score.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | import json
4 | from transformers import (
5 | AutoModelForSeq2SeqLM,
6 | AutoTokenizer,
7 | )
8 |
9 | def init():
10 | """
11 | This function is called when the container is initialized/started, typically after create/update of the deployment.
12 | You can write the logic here to perform init operations like caching the model in memory
13 | """
14 | global model, tokenizer
15 | # AZUREML_MODEL_DIR is an environment variable created during deployment.
16 | # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
17 | model_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), os.listdir(os.getenv("AZUREML_MODEL_DIR"))[0])
18 | print("model_path")
19 | print(os.listdir(model_path))
20 | model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
21 | tokenizer = AutoTokenizer.from_pretrained(model_path)
22 | print("Init complete")
23 |
24 |
25 | def run(raw_data):
26 | global model, tokenizer
27 | """
28 | This function is called for every invocation of the endpoint to perform the actual scoring/prediction.
29 | In the example we extract the data from the json input and call the scikit-learn model's predict()
30 | method and return the result back
31 | """
32 | logging.info("Request received")
33 | article = json.loads(raw_data)["data"]
34 | if "t5" in model.config.architectures[0].lower():
35 | article= "summarize:" + article
36 |
37 | inputs = tokenizer(article, return_tensors="pt", max_length=512, truncation=True)
38 | outputs = model.generate(
39 | inputs["input_ids"], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True
40 | )
41 | result = tokenizer.decode(outputs[0])
42 | print(result)
43 | logging.info("Request processed")
44 | return result
45 |
--------------------------------------------------------------------------------
/nlp/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 |
4 | name: deploy-model-training-pipeline
5 |
6 | variables:
7 | - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
8 | # 'main' branch: PRD environment
9 | - template: ../../config-infra-prod.yml
10 | - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
11 | # 'develop' or feature branches: DEV environment
12 | - template: ../../config-infra-dev.yml
13 | - name: version
14 | value: python-sdk-v2
15 |
16 | trigger: none
17 |
18 | pool:
19 | vmImage: ubuntu-20.04
20 |
21 | resources:
22 | repositories:
23 | - repository: mlops-templates # Template Repo
24 | name: mlops-templates
25 | type: git
26 | ref: main
27 |
28 | stages:
29 | - stage: DeployTrainingPipeline
30 | displayName: Deploy Training Pipeline
31 | jobs:
32 | - job: DeployTrainingPipeline
33 | steps:
34 | - checkout: self
35 | path: s/
36 | - task: Bash@3
37 | displayName: "Create checkout repository folder(s)"
38 | inputs:
39 | targetType: "inline"
40 | script: |
41 | set -e
42 | mkdir "$(Build.Repository.Name)"
43 | mkdir "mlops-templates"
44 | - checkout: mlops-templates
45 | path: s/templates/
46 | - template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates
47 | - template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates
48 | - template: templates/python-sdk-v2/install-requirements.yml@mlops-templates
49 | - template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates
50 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
51 | parameters:
52 | cluster_name: cpu-cluster
53 | size: STANDARD_DS3_V2
54 | min_instances: 0
55 | max_instances: 1
56 | cluster_tier: dedicated
57 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
58 | parameters:
59 | cluster_name: cpu-cluster-lg
60 | size: Standard_D14_v2
61 | min_instances: 0
62 | max_instances: 1
63 | cluster_tier: dedicated
64 | - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
65 | parameters:
66 | cluster_name: gpu-cluster
67 | size: Standard_NV6
68 | min_instances: 0
69 | max_instances: 1
70 | cluster_tier: dedicated
71 | - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
72 | parameters:
73 | build_type: docker
74 | environment_name: nlp_summarization_train
75 | environment_path: data-science/environments/training
76 | - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
77 | parameters:
78 | pipeline_path: mlops/azureml/train/pipeline-train.py
79 | experiment_name: $(environment)_nlp_summarization_$(Build.SourceBranchName)
80 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | black==22.3.0
2 | flake8==4.0.1
3 | isort==5.10.1
4 | pre-commit==2.19.0
5 |
--------------------------------------------------------------------------------