├── .azuredevops
    ├── ado-ci-pipeline-ms-hosted.yml
    ├── ado-ci-pipeline-self-hosted.yml
    └── pull_request_template.md
├── .dockerignore
├── .env.example
├── .github
    ├── dependabot.yml
    ├── pull_request_template.md
    └── workflows
    │   └── ci.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── ci-tests.sh
├── notebooks
    ├── .devcontainer
    │   ├── Dockerfile
    │   ├── devcontainer.json
    │   └── requirements.txt
    └── sample_notebook.py
├── pyproject.toml
├── requirements-dev.txt
└── src
    ├── .amlignore
    ├── __init__.py
    ├── common
        ├── __init__.py
        └── requirements.txt
    ├── sample_cpu_project
        ├── .devcontainer
        │   ├── Dockerfile
        │   ├── devcontainer.json
        │   └── requirements.txt
        ├── sample_main.py
        └── tests
        │   ├── .gitkeep
        │   └── test_dummy.py
    └── sample_pytorch_gpu_project
        ├── .amlignore
        ├── .devcontainer
            ├── Dockerfile
            ├── devcontainer.json
            └── requirements.txt
        ├── .gitignore
        ├── README.md
        ├── aml_example
            ├── aml_components
            │   ├── inference-component.yaml
            │   └── train-component.yaml
            ├── aml_setup
            │   ├── create-cpu-compute.yaml
            │   ├── create-env.yaml
            │   └── create-gpu-compute.yaml
            └── sample-aml-components-pipeline.yml
        ├── inference.py
        ├── sample_main.py
        ├── tests
            ├── .gitkeep
            └── test_dummy.py
        └── train.py


/.azuredevops/ado-ci-pipeline-ms-hosted.yml:
--------------------------------------------------------------------------------
 1 | # Azure DevOps pipeline for CI (Microsoft-hosted version)
 2 | # As the Microsoft-hosted agent option has a limit of 10GB of storage for disk outputs from a pipeline, 
 3 | # this causes an issue when the Docker images for modules under src require more than 10GB of storage. 
 4 | # If you will run into space issues (or other limitations with a Microsoft hosted agent option outlined in
 5 | # https://learn.microsoft.com/en-us/azure/devops/pipelines/agents/hosted?view=azure-devops&tabs=yaml#capabilities-and-limitations), 
 6 | # consider using the .azuredevops/ado-ci-pipeline-self-hosted.yml version or using scale set agents, see
 7 | # this link for more info: https://learn.microsoft.com/en-us/azure/devops/pipelines/agents/scale-set-agents?view=azure-devops
 8 | # Note that docker images will only be build for src directories that contain at least one test file, so the
 9 | # total space consumed by Docker builds will be dependent on which modules under src contain tests.
10 | # For setting up the pipeline in ADO see:
11 | # https://learn.microsoft.com/en-us/azure/devops/pipelines/agents/pools-queues?view=azure-devops&tabs=yaml%2Cbrowser
12 | 
13 | 
14 | trigger:
15 |   - main
16 | 
17 | pool:
18 |   vmImage: 'ubuntu-latest'
19 |   
20 | steps:
21 |   - task: UsePythonVersion@0
22 |     displayName: "Use Python 3.11"
23 |     inputs:
24 |       versionSpec: 3.11
25 | 
26 |   - script: |
27 |       python -m venv venv
28 |       source venv/bin/activate
29 |       python -m pip install --upgrade pip
30 |       pip install -r requirements-dev.txt
31 |       pip install pytest-azurepipelines
32 |     displayName: "Install requirements"
33 | 
34 |   # files under venv will be automatically excluded from ruff check by default https://docs.astral.sh/ruff/settings/#exclude
35 |   - bash: |
36 |       source venv/bin/activate
37 |       ruff check --output-format azure
38 |     displayName: "Run ruff linter"
39 |     
40 |   - task: Bash@3
41 |     inputs:
42 |       targetType: 'filePath'
43 |       filePath: ci-tests.sh
44 |     env:
45 |       BUILD_ARTIFACTSTAGINGDIRECTORY: $(Build.ArtifactStagingDirectory)
46 |     displayName: "Run pytest in docker containers"
47 | 
48 |   - task: PublishTestResults@2
49 |     inputs:
50 |       testResultsFiles: '**/test-results-*.xml'
51 |       searchFolder: $(Build.ArtifactStagingDirectory)
52 |     condition: succeededOrFailed()
53 | 
54 |   # Publish code coverage results
55 |   - task: PublishCodeCoverageResults@1
56 |     inputs:
57 |       codeCoverageTool: 'Cobertura' # Available options: 'JaCoCo', 'Cobertura'
58 |       summaryFileLocation: '$(Build.ArtifactStagingDirectory)/coverage.xml'
59 |       pathToSources: src/
60 |       #reportDirectory: # Optional
61 |       #additionalCodeCoverageFiles: # Optional
62 |       failIfCoverageEmpty: false # Optional
63 | 


--------------------------------------------------------------------------------
/.azuredevops/ado-ci-pipeline-self-hosted.yml:
--------------------------------------------------------------------------------
 1 | # Azure DevOps pipeline for CI (self-hoseted version)
 2 | # As the Microsoft-hosted agent option has a limit of 10GB of storage for disk outputs from a pipeline, 
 3 | # this causes an issue when the Docker images for modules under src require more than 10GB of storage.
 4 | # The self-hosted agent option allows the storage to be increased based on the VM size. This version 
 5 | # includes extra clean-up and space management steps relating to docker builds, but it otherwise equivalent
 6 | # to the .azuredevops/ado-ci-pipeline-ms-hosted.yml version.
 7 | # For setting up a CI pipeline with a self-hosted Linux agent see:
 8 | # https://learn.microsoft.com/en-us/azure/devops/pipelines/agents/v2-linux?view=azure-devops
 9 | # Note that the CI scripts that this pipeline runs (ci-tests.sh) is designed to be run on a Linux agent, 
10 | # but could be adapated to other OSs.
11 | 
12 | 
13 | trigger:
14 |   - main
15 | 
16 | pool:
17 |   name: Default
18 |   demands:
19 |   - agent.name -equals mc-ubuntu-agent
20 | workspace:
21 |   clean: all
22 |   
23 | steps:
24 |   - script: |
25 |       docker image prune -f
26 |       docker container prune -f
27 |     displayName: "Docker Cleanup"
28 | 
29 |   - script: |
30 |       df -h
31 |     displayName: "Check agent VM space"
32 | 
33 |   - task: UsePythonVersion@0
34 |     displayName: "Use Python 3.11"
35 |     inputs:
36 |       versionSpec: 3.11
37 | 
38 |   - script: |
39 |       python -m venv venv
40 |       source venv/bin/activate
41 |       python -m pip install --upgrade pip
42 |       pip install -r requirements-dev.txt
43 |       pip install pytest-azurepipelines
44 |     displayName: "Install requirements"
45 | 
46 |   - task: UseDotNet@2
47 |     inputs: 
48 |       packageType: 'sdk'
49 |       workingDirectory: "src/"
50 |       version: '6.x'
51 | 
52 |   # files under venv will be automatically excluded from ruff check by default https://docs.astral.sh/ruff/settings/#exclude
53 |   - bash: |
54 |       source venv/bin/activate
55 |       ruff check --output-format azure
56 |     displayName: "Run ruff linter"
57 |     
58 |   - task: Bash@3
59 |     inputs:
60 |       targetType: 'filePath'
61 |       filePath: ci-tests.sh
62 |     displayName: "Run pytest in docker containers"
63 | 
64 |   - task: PublishTestResults@2
65 |     inputs:
66 |       testResultsFiles: "/tmp/artifact_output/**/test-results-*.xml"
67 |     condition: succeededOrFailed()
68 | 
69 |   # Publish code coverage results
70 |   - task: PublishCodeCoverageResults@1
71 |     inputs:
72 |       codeCoverageTool: 'Cobertura' # Available options: 'JaCoCo', 'Cobertura'
73 |       summaryFileLocation: '/tmp/artifact_output/coverage.xml'
74 |       pathToSources: src/
75 |       #reportDirectory: # Optional
76 |       #additionalCodeCoverageFiles: # Optional
77 |       failIfCoverageEmpty: false # Optional
78 | 
79 |   - bash: |
80 |       sudo rm -rfv /home/azureuser/myagent/_work/* /home/azureuser/myagent/_work/.* || true
81 |     displayName: "Clean-up _work dir"
82 |     condition: always()
83 | 


--------------------------------------------------------------------------------
/.azuredevops/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | # Purpose
 2 | <!-- Describe the intention of the changes being proposed. What problem does it solve or functionality does it add? -->
 3 | 
 4 | ## Does this introduce a breaking change?
 5 | <!-- Mark one with an "x". -->
 6 | 
 7 | * [ ] Yes
 8 | * [ ] No
 9 | 
10 | ## Author pre-publish checklist
11 | <!-- Please check check before publishing PR using "x". -->
12 | 
13 | * [ ] No PII in logs or output
14 | * [ ] Made corresponding changes to the documentation
15 | * [ ] All new packages used are included in requirements.txt
16 | * [ ] Functions use type hints, and there are no type hint errors
17 | 
18 | ## Pull Request Type
19 | 
20 | What kind of change does this Pull Request introduce?
21 | <!-- Please check the one that applies to this PR using "x". -->
22 | 
23 | * [ ] Bugfix
24 | * [ ] Feature
25 | * [ ] Code style update (formatting, local variables)
26 | * [ ] Refactoring (no functional changes, no api changes)
27 | * [ ] Documentation content changes
28 | * [ ] Experiment notebook
29 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | **/outputs
 2 | **/data
 3 | **/logs
 4 | **/tests
 5 | **/__pycache__
 6 | **/.mypy_cache
 7 | **/.pytest_cache
 8 | **/.vscode
 9 | **/junit
10 | **/.azuredevops
11 | **/.github
12 | **/.venv
13 | **/venv
14 | **/*.md
15 | **/train_artifacts
16 | **/mlruns
17 | **/.env
18 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | PYTHONPATH=/workspaces/dstoolkit-devcontainers/src
2 | 
3 | # Set these variables if using the AML CLI v2 example under src/sample_pytorch_gpu_project
4 | # GROUP="azureml-examples"
5 | # LOCATION="eastus"
6 | # WORKSPACE="main"
7 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip"
 9 |     directories:
10 |       - "**/*"
11 |     schedule:
12 |       interval: "monthly"
13 |     groups:
14 |       pip-minor-patch-updates:
15 |         applies-to: version-updates
16 |         update-types:
17 |         - "minor"
18 |         - "patch"
19 |   - package-ecosystem: "github-actions"
20 |     # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
21 |     directory: "/"
22 |     schedule:
23 |       interval: "monthly"
24 |   - package-ecosystem: "devcontainers"
25 |     directories:
26 |       - "**/*"
27 |     schedule:
28 |       interval: "monthly"
29 |   - package-ecosystem: "docker"
30 |     directories:
31 |       - "**/*"
32 |     schedule:
33 |       interval: "monthly"
34 |     ignore:
35 |       - dependency-name: "*"
36 |         update-types: ["version-update:semver-major", "version-update:semver-minor"]
37 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | # Purpose
 2 | <!-- Describe the intention of the changes being proposed. What problem does it solve or functionality does it add? -->
 3 | 
 4 | ## Does this introduce a breaking change?
 5 | <!-- Mark one with an "x". -->
 6 | 
 7 | * [ ] Yes
 8 | * [ ] No
 9 | 
10 | ## Author pre-publish checklist
11 | <!-- Please check check before publishing PR using "x". -->
12 | 
13 | * [ ] No PII in logs or output
14 | * [ ] Made corresponding changes to the documentation
15 | * [ ] All new packages used are included in requirements.txt
16 | * [ ] Functions use type hints, and there are no type hint errors
17 | 
18 | ## Pull Request Type
19 | 
20 | What kind of change does this Pull Request introduce?
21 | <!-- Please check the one that applies to this PR using "x". -->
22 | 
23 | * [ ] Bugfix
24 | * [ ] Feature
25 | * [ ] Code style update (formatting, local variables)
26 | * [ ] Refactoring (no functional changes, no api changes)
27 | * [ ] Documentation content changes
28 | * [ ] Experiment notebook
29 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | # Github Actions Workflow for CI
 2 | 
 3 | name: CI
 4 | 
 5 | on:
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |   pull_request:
10 |     branches:
11 |       - main
12 | permissions:
13 |   contents: read
14 |   actions: read
15 |   checks: write
16 | jobs:
17 |   build:
18 |     runs-on: ubuntu-latest
19 | 
20 |     steps:
21 |       - name: Checkout code
22 |         uses: actions/checkout@v4
23 | 
24 |       - name: Setup Python 3.11
25 |         uses: actions/setup-python@v5
26 |         with:
27 |           python-version: 3.11
28 | 
29 |       - name: Install requirements
30 |         run: |
31 |           python -m venv venv
32 |           source venv/bin/activate
33 |           python -m pip install --upgrade pip
34 |           pip install -r requirements-dev.txt
35 | 
36 |       - name: Run ruff linter
37 |         # files under venv will be automatically excluded from ruff check by default https://docs.astral.sh/ruff/settings/#exclude
38 |         run: |
39 |           source venv/bin/activate
40 |           ruff check --output-format github
41 | 
42 |       - name: Run pytest in docker containers
43 |         run: ./ci-tests.sh
44 |         env:
45 |           BUILD_ARTIFACTSTAGINGDIRECTORY: ${{ github.workspace }}
46 | 
47 |       - name: Publish Test Results
48 |         uses: dorny/test-reporter@v2
49 |         if: always()
50 |         with:
51 |           name: pytest
52 |           path: |
53 |             **/test-results-*.xml
54 |           reporter: java-junit
55 | 
56 |       - name: Publish Code Coverage Summary Report
57 |         uses: irongut/CodeCoverageSummary@v1.3.0
58 |         with:
59 |           badge: true
60 |           output: both
61 |           format: markdown
62 |           filename: coverage.xml
63 | 
64 |       - name: Add code coverage summary markdown to github step summary
65 |         run: cat code-coverage-results.md >> $GITHUB_STEP_SUMMARY
66 | 
67 |       - name: Archive test and code coverage results
68 |         uses: actions/upload-artifact@v4
69 |         with:
70 |           name: test-and-coverage-results
71 |           path: |
72 |             **/test-reuslts-*.xml
73 |             coverage.xml
74 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | data
  2 | logs
  3 | .vscode
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | test-output.xml
 58 | junit/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | .pybuilder/
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | #   For a library or package, you might want to ignore these files since the code is
 93 | #   intended to run in multiple environments; otherwise, check them in:
 94 | # .python-version
 95 | 
 96 | # pipenv
 97 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 98 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 99 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | #   install all needed dependencies.
101 | #Pipfile.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/#use-with-ide
116 | .pdm.toml
117 | 
118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
119 | __pypackages__/
120 | 
121 | # Celery stuff
122 | celerybeat-schedule
123 | celerybeat.pid
124 | 
125 | # SageMath parsed files
126 | *.sage.py
127 | 
128 | # Environments
129 | .env
130 | .venv
131 | env/
132 | venv/
133 | ENV/
134 | env.bak/
135 | venv.bak/
136 | 
137 | # Spyder project settings
138 | .spyderproject
139 | .spyproject
140 | 
141 | # Rope project settings
142 | .ropeproject
143 | 
144 | # mkdocs documentation
145 | /site
146 | 
147 | # mypy
148 | .mypy_cache/
149 | .dmypy.json
150 | dmypy.json
151 | 
152 | # Pyre type checker
153 | .pyre/
154 | 
155 | # pytype static type analyzer
156 | .pytype/
157 | 
158 | # Cython debug symbols
159 | cython_debug/
160 | 
161 | # PyCharm
162 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
165 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | #.idea/
167 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 |   - repo: local
 5 |     hooks:
 6 |       - id: ruff sort imports
 7 |         name: ruff sort imports
 8 |         entry: ruff check --select I --fix
 9 |         require_serial: true
10 |         language: system
11 |         types_or: [python, pyi, jupyter]
12 |       - id: ruff format
13 |         name: ruff format
14 |         entry: ruff format
15 |         language: system
16 |         require_serial: true
17 |         types_or: [python, pyi, jupyter]
18 |       - id: ruff lint
19 |         name: ruff lint
20 |         entry: ruff check --force-exclude
21 |         language: system
22 |         types_or: [python, pyi, jupyter]
23 |         require_serial: true
24 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Dev Containers for ML feasibility study with VS Code
  2 | 
  3 | A machine learning and data science project template that makes it easy to work with multiple Docker based [VSCode Dev Containers](https://code.visualstudio.com/docs/devcontainers/containers) in the same repository. The template also makes it easy to transition projects to the cloud and production by including automated code quality checks, pytest configuration, CI pipeline templates and a sample for running on Azure Machine Learning.
  4 | 
  5 | ## Contents
  6 | 
  7 | - [Dev Containers for ML feasibility study with VS Code](#dev-containers-for-ml-feasibility-study-with-vs-code)
  8 |   - [Contents](#contents)
  9 |   - [Introduction and Overview](#introduction-and-overview)
 10 |     - [Features](#features)
 11 |   - [Getting Started](#getting-started)
 12 |     - [How to setup dev environment?](#how-to-setup-dev-environment)
 13 |   - [How to create a new directory under src with a new environment](#how-to-create-a-new-directory-under-src-with-a-new-environment)
 14 |   - [Directory Structure](#directory-structure)
 15 |     - [`notebooks` directory vs `src` directory](#notebooks-directory-vs-src-directory)
 16 |   - [AML Example](#aml-example)
 17 |   - [CI Pipeline](#ci-pipeline)
 18 |     - [Running all unit tests with `ci-tests.sh`](#running-all-unit-tests-with-ci-testssh)
 19 |     - [How to Configure Azure DevOps CI Pipeline](#how-to-configure-azure-devops-ci-pipeline)
 20 |       - [Choosing between Azure DevOps Microsoft-hosted vs Self-hosted CI Pipeline](#choosing-between-azure-devops-microsoft-hosted-vs-self-hosted-ci-pipeline)
 21 |     - [How to Configure Github Actions CI Pipeline](#how-to-configure-github-actions-ci-pipeline)
 22 |   - [Using SSH Keys in Dev Containers](#using-ssh-keys-in-dev-containers)
 23 |   - [Future Roadmap](#future-roadmap)
 24 |   - [Contributing](#contributing)
 25 |   - [Trademarks](#trademarks)
 26 | 
 27 | ## Introduction and Overview
 28 | 
 29 | This repository provides a [VSCode Dev Container](https://code.visualstudio.com/docs/devcontainers/containers) based project template that can help accelerate your Machine Learning inner-loop development phase. The template covers the phases from early ML experimentation (local training/testing) until production oriented ML model training (cloud based training/testing with bigger CPUs and GPUs).
 30 | 
 31 | During the early phase of Machine Learning project, you may face challenges such as each data scientist creating various different python environments that span across CPU and GPU that tend to have different setup procedures. With the power of Dev Containers, you can automate environment setup process across the team and every data scientist will get the exact same environment automatically. This template provides both CPU and GPU Dev Container setup as examples. To support multiple different ML approaches with different python environments to be experimented in one project, this solution allows multiple different Dev Containers to be used in one repository while having a "common" module that will be installed into all Dev Container to enable code reuse across different Dev Containers.
 32 | 
 33 | Another challenge you may face is each data scientist creating a low quality codebase. That is fine during the experimentation stage to keep the team agility high and maximize your team’s experimentation throughput. But when you move to the model productionization stage, you experience the burden of bringing code quality up to production level. With the power of python tools and VSCode extensions configured for this template on top of Dev Containers, you can keep the code quality high automatically without losing your team’s agility and experimentation throughput and ease the transition to the productionization phase.
 34 | 
 35 | ### Features
 36 | 
 37 | - Multiple Dev Container samples (both CPU and GPU) with many common config steps already configured as following:
 38 |   - Automated code quality checks (linter and auto formatter) and automated fix when possible with ruff on VSCode on save
 39 |   - Automated code quality checks (linter and auto formatter) with ruff as precommit hook
 40 |   - Zero effort transition from local env to Azure Machine Learning (cloud based env) by leveraging the same Dockerfile
 41 |   - Pre-configured VSCode extensions installed such as python, jupyter, shellcheck, code-spell-checker, git tools etc
 42 | - [Github Actions and Azure DevOps CI pipelines](#ci-pipeline) that run linter (ruff) and pytest with test result reporting and coverage reporting
 43 | - Pull Request templates that helps you to write a good PR description for both Github and Azure DevOps
 44 | 
 45 | This template automates all tedious setup process as much as possible and saves time and reduces setup errors for the entire data scientist team.
 46 | 
 47 | ## Getting Started
 48 | 
 49 | This section provides a comprehensive guide on how to set up a development environment using Dev Containers in Visual Studio Code with step-by-step instructions.
 50 | 
 51 | ### How to setup dev environment?
 52 | 
 53 | 1. Install [Visual Studio Code](https://code.visualstudio.com/)
 54 | 1. If your team has a commercial license for Docker Desktop, follow [VS Code Remote Containers | Docker Desktop](https://code.visualstudio.com/docs/remote/containers#_installation). Otherwise, go to [VS Code Remote Containers | Rancher Desktop Docs](https://docs.rancherdesktop.io/how-to-guides/vs-code-remote-containers/) and finish the first step (Install and launch Rancher Desktop. Select dockerd (moby) as the Container Runtime from the Kubernetes Settings menu.)
 55 | 1. Install [VSCode Remote - Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) on vscode
 56 | 1. **If you forget this step, you will get an error when you try to build the container so make sure you have `.env` at root of this directory before you move on to the next step.**. Copy `.env.example` and rename it to `.env`. This is where you store your credentials etc. `.env` is automatically loaded into dev container as environment variables. When you add new environment variables `.env`, update `.env.example` as well to share that with others but don't include any credentials there. `.env` is gitignored so your credentials in that file won't be accidentally committed.
 57 | 1. Run `Dev Containers: Open Folder in Container...` from the Command Palette (F1) and select the `notebooks` directory.
 58 | 1. VS Code will then build and start up a container, connect this window to Dev Container: `notebooks`, and install VS Code extensions specified in `notebooks/.devcontainer/devcontainer.json`. `pre-commit install --overwrite` runs as part of `postCreateCommand` in `devcontainer.json` and this will setup your git precommit hook automatically.
 59 | 1. Now set up is done. If you want to develop in another directory for example under `src`, run `Dev Containers: Open Folder in Container...` and go to that directory that has `.devcontainer` and that will setup an dev environment for that directory.
 60 | 1. When you or others update either `requirements.txt` or `Dockerfile` in your working directory, make sure to rebuild your container to apply those changes to container. Run `Dev Containers: Rebuild and Reopen in Container...` for that.
 61 | 
 62 | ## How to create a new directory under src with a new environment
 63 | 
 64 | 1. Copy `src/sample_cpu_project/` under `src` and rename it. If you need gpu environment, base off of `src/sample_pytorch_gpu_project` instead
 65 | 1. Update `COPY sample_cpu_project/.devcontainer/requirements.txt` in `Dockerfile` with a new path
 66 | 1. Update other parts of `Dockerfile` if you need
 67 | 1. Update `requirements.txt` if you need
 68 | 1. Run `Dev Containers: Open Folder in Container...` from the Command Palette (F1) and select the new directory and make sure you can successfully open the new directory on VS Code running in a container
 69 | 
 70 | ## Directory Structure
 71 | 
 72 | This section gives you overview of the directory structure of this template. Only essential files are covered in this structure graph for simplicity. The directory structure is as follows:
 73 | 
 74 | ```bash
 75 | .
 76 | ├── .azuredevops                   # CI pipelines for Azure DevOps. Details at section: How to Configure Azure DevOps CI Pipeline 
 77 | ├── .github                        # CI pipelines for Github Actions. Details at section: How to Configure Github Actions CI Pipeline 
 78 | ├── .pre-commit-config.yaml        # pre-commit config file with formatting and linting. Setup is covered in Section: Getting Started
 79 | ├── .env.example                   # Example of .env file. Setup is covered in Section: Getting Started
 80 | ├── ci-tests.sh                    # Details at Section: Running all unit tests with ci-tests.sh
 81 | ├── data                           # Directory to keep your data for local training etc. This directory is gitignored 
 82 | ├── notebooks                      # Setup process is covered in Section: How to setup dev environment?
 83 | │   ├── .devcontainer              # dev container related configuration files goes to here following VSCode convention
 84 | │   │   ├── devcontainer.json      # dev container configuration and VS Code settings, extensions etc.
 85 | │   │   ├── Dockerfile             # referred in devcontainer.json
 86 | │   │   └── requirements.txt       # includes python package list for notebooks. used in Dockerfile
 87 | │   └── sample_notebook.py         # example of interactive python script
 88 | ├── pyproject.toml                 # Setting file for ruff, pytest and pytest-cov
 89 | └── src
 90 |     ├── common                     # this module is accessible from all modules under src. put functions  you want to import across the projects here
 91 |     │   └── requirements.txt       # python package list for common module. installed in all Dockerfile under src. python tools for src goes to here too
 92 |     ├── sample_cpu_project         # cpu project example. Setup process is covered in Section: How to setup dev environment?
 93 |     │   ├── .devcontainer          # dev container related configuration files goes to here following VSCode convention
 94 |     │   │   ├── devcontainer.json  # dev container configuration and VS Code settings, extensions etc.
 95 |     │   │   ├── Dockerfile         # referred in devcontainer.json. Supports only CPU
 96 |     │   │   └── requirements.txt   # includes python package list for sample_cpu_project. used in Dockerfile
 97 |     │   ├── sample_main.py         
 98 |     │   └── tests                  # pytest scripts for sample_cpu_project goes here
 99 |     │       └── test_dummy.py      # pytest script example
100 |     └── sample_pytorch_gpu_project # gpu project example with pytorch. Setup process is covered in Section: How to setup dev environment?
101 |         ├── README.md              # README for AML example contained in sample_pytorch_gpu_project
102 |         ├── .devcontainer          # dev container related configuration files goes to here following VSCode convention
103 |         │   ├── devcontainer.json  # dev container configuration and VS Code settings, extensions etc.
104 |         │   ├── Dockerfile         # referred in devcontainer.json. Supports GPU
105 |         │   └── requirements.txt   # includes python package list for sample_pytorch_gpu_project. used in Dockerfile
106 |         ├── aml_example/           # Sample AML CLI v2 Components-based pipeline, including setup YAML. See sample_pytorch_gpu_project/README for full details of files in this directory.
107 |         ├── sample_main.py        
108 |         ├── inference.py           # Example pytorch inference/eval script that also works with aml_example
109 |         ├── train.py               # Example pytorch model training script that also works with aml_example
110 |         └── tests                  # pytest scripts for sample_pytorch_gpu_project goes here
111 |             └── test_dummy.py      # pytest script example
112 | ```
113 | 
114 | ### `notebooks` directory vs `src` directory
115 | 
116 | There are two places to put python scripts/modules in this template. The `notebooks` directory is for experimental or throw-away python scripts and jupyter notebooks that you want to run cell by cell interactively. For example, EDA, one-off visualization codes, new model approaches you are not certain yet if you want to maintain over time typically go to this directory. The `src` directory is for python scripts and modules that you want to reuse and maintain over time. The `src` directory is also where you would put unit tests (typically under a `src/your_module/tests` directory).
117 | 
118 | Given the nature of each directory's responsibility, there is also a different quality governance required. One big difference is that pre-commit hooks and CI pipelines run `ruff check` (linter) over `src` but not over `notebooks` (`ruff format` still run for both). For scripts in `notebooks`, we recommend you use [interactive python scripts](https://code.visualstudio.com/docs/python/jupyter-support-py#_convert-jupyter-notebooks-to-python-code-file) where you can have jupyter-like code cells within `.py` files rather than jupyter notebooks `.ipynb`. Interactive python files gives you the following benefits:
119 | 
120 | - Comes with full benefits of python extension in VSCode such as code completion, linting, auto formatting, debugging etc
121 | - pre-commit hooks and CI pipelines will work as they run over `.py` files (but not perfectly over `.ipynb` files)
122 | - Python file format is easier to review during a pull request review
123 | 
124 | Interactive python scripts and jupyter notebooks are interchangeable as described in [Convert Jupyter notebooks to Python code file](https://code.visualstudio.com/docs/python/jupyter-support-py#_convert-jupyter-notebooks-to-python-code-file) so you can switch between them easily too if you want to use both formats during the development.
125 | 
126 | ## AML Example
127 | 
128 | An Azure Machine Learning (AML) example is provided under `src/sample_pytorch_gpu_example`. The example is a AML Components-based ML pipeline, that runs a pytorch based training step followed by a inference/evaluation step. This example shows the seamless transition of moving from a local run (inside the Dev Container) of pytorch based training/inference and running in the cloud in the exact same Docker environment with flexible compute options. See the [AML Components-based Pipeline Example README](src/sample_pytorch_gpu_project/README.md) for a detailed explanation and instructions of the example code.
129 | 
130 | ## CI Pipeline
131 | 
132 | This repository contains templates for running a Continuous Integration (CI) pipeline on either Azure DevOps (under `.azuredevops` directory) or on Github Actions (under `.github` directory). Each of the CI pipeline configurations provided have the following features at a high level:
133 | 
134 | - Run code quality checks (`ruff check`) over the repository
135 | - Find all subdirectories under `src` and run all pytest tests inside the associated Docker containers
136 | - Publish test results and code coverage statistics
137 | 
138 | We recommend setting up pipeline triggers for PR creation, editing and merging. This will ensure the pipeline runs continuously and will help catch any issues earlier in your development process.
139 | 
140 | See the sections below for links on how to setup pipelines with [Azure DevOps](#how-to-configure-azure-devops-ci-pipeline) and [Github Actions](#how-to-configure-github-actions-ci-pipeline). Note that if you are only using one of these platforms to host a pipeline (or neither), you can safely delete either (or both) the `.azuredevops` directory or the `.github` directory.
141 | 
142 | ### Running all unit tests with `ci-tests.sh`
143 | 
144 | As multiple independent directories can be added under `src`, each with its own Dockerfile and requirements, running unit tests for each directory under `src` needs to be done within the Docker container of each `src` subdirectory. The `ci-tests.sh` script automates this task of running all unit tests for the repository with the following steps:
145 | 
146 | 1. Finds all subdirectories under `src` that have at least one `test_*.py` under a `tests` folder
147 | 2. Builds each Docker image for each subdirectory with tests, using the Dockerfile in the associated `.devcontainer` directory
148 | 3. Runs pytest for each subdirectory with tests, inside the matching Docker container built in step 2
149 | 4. Combine all test results and coverage reports from step 3, with reports in a valid format for publishing in either Azure DevOps or Github Actions hosted pipeline
150 | 
151 | Note that the `ci-test.sh` script can be run locally as well and it is assumed that all tests are written with pytest.
152 | 
153 | ### How to Configure Azure DevOps CI Pipeline
154 | 
155 | See [create your first pipeline](https://learn.microsoft.com/en-us/azure/devops/pipelines/create-first-pipeline?view=azure-devops) for how to setup a pipeline in Azure DevOps. Note that to use the provided template in this repository, you will need to specify the path to `.azuredevops/ado-ci-pipeline-ms-hosted.yml` during the pipeline setup process in Azure DevOps.
156 | 
157 | #### Choosing between Azure DevOps Microsoft-hosted vs Self-hosted CI Pipeline
158 | 
159 | There are two templates for running a CI pipeline in Azure DevOps, a pipeline configuration that uses a Microsoft hosted agent to run the pipeline (`.azuredevops/ado-ci-pipeline-ms-hosted.yml`) and a pipeline configuration that uses a self-hosted agent to run the pipeline (`.azuredevops/ado-ci-pipeline-self-hosted.yml`).
160 | 
161 | The Microsoft hosted version is easiest to start with and recommended. Where you may consider switching to the self-hosted version, is when you have added several directories under `src` that have individual containers and the size of all the docker builds in the CI pipeline comes up against the 10GB disk storage limit for Microsoft hosted pipelines (see [resource limitations of Microsoft hosted agents](https://learn.microsoft.com/en-us/azure/devops/pipelines/agents/hosted?view=azure-devops&tabs=yaml#capabilities-and-limitations)). In this case (or when other resource constraints are met) switching to a self-hosted agent pipeline may be an option and the template at `.azuredevops/ado-ci-pipeline-self-hosted.yml` includes additional steps to help manage space consumed by CI pipeline runs. The two versions are otherwise identitical in terms of building each docker container under `src`, running pytest within each of these containers and publishing test results and coverage information.
162 | 
163 | ### How to Configure Github Actions CI Pipeline
164 | 
165 | Github Actions CI pipeline is defined in `.github/workflows/ci.yaml`. As long as this repository is hosted in github, the pipeline will be automatically triggered when a PR is made or updated as well as when a PR is merged into your main branch with the setting below, so **no additional setting is required**.
166 | 
167 | ```yaml
168 | on:
169 |   push:
170 |     branches:
171 |       - main
172 |   pull_request:
173 |     branches:
174 |       - main
175 | ```
176 | 
177 | ## Using SSH Keys in Dev Containers
178 | 
179 | If you have connected to the origin repository using SSH authentication, you will need to do a bit of setup to reuse your local SSH key inside a Dev Container automatically, which will allow you to interact with the origin repository (git push, git pull etc.) inside the Dev Container.
180 | 
181 | 1. Try the recommendations in the official docs for [sharing git credentials](https://code.visualstudio.com/remote/advancedcontainers/sharing-git-credentials)
182 | 1. If the previous step doesn't work, try the below method, that includes a bit of additional code to add keys to the SSH agent.
183 | 
184 | Add the following to your ~/.bash_profile, ~/.profile, ~/.zprofile or similar (by default most WSL users will have only a ~/.profile) so an ssh-agent will be started when needed and default keys will be added to the agent. The ssh-agent will then automatically forward keys to your Dev Container when its launched.
185 | 
186 | ```sh
187 | # this part taken from https://code.visualstudio.com/remote/advancedcontainers/sharing-git-credentials
188 | # check that link for the latest version or updates
189 | if [ -z "$SSH_AUTH_SOCK" ]; then
190 |    # Check for a currently running instance of the agent
191 |    RUNNING_AGENT="`ps -ax | grep 'ssh-agent -s' | grep -v grep | wc -l | tr -d '[:space:]'`"
192 |    if [ "$RUNNING_AGENT" = "0" ]; then
193 |         # Launch a new instance of the agent
194 |         ssh-agent -s &> $HOME/.ssh/ssh-agent
195 |    fi
196 |    eval `cat $HOME/.ssh/ssh-agent`
197 | fi
198 | 
199 | # ADD SSH Keys to the SSH agent
200 | # if using non-default SSH key, add it to ssh-add command like:
201 | # ssh-add /path/to/your/ssh-key
202 | ssh-add
203 | ```
204 | 
205 | ## Future Roadmap
206 | 
207 | - Add Docker build caching to Azure DevOps MS hosted CI pipeline
208 | - Investigate making `src/common` installed with `pip -e`
209 | 
210 | ## Contributing
211 | 
212 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit <https://cla.opensource.microsoft.com>.
213 | 
214 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
215 | 
216 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
217 | 
218 | ## Trademarks
219 | 
220 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
221 | 
222 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
223 | 
224 | Any use of third-party trademarks or logos are subject to those third-party's policies.
225 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # Support
 2 | 
 3 | ## How to file issues and get help  
 4 | 
 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
 6 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
 7 | feature request as a new Issue.
 8 | 
 9 | ## Microsoft Support Policy  
10 | 
11 | Support for this project is limited to the resources listed above.
12 | 


--------------------------------------------------------------------------------
/ci-tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | : '
 3 | This script will run all unit tests in the repository (for all directories under src/ that
 4 | have at least one test_*.py under a tests folder). It will build a Docker image for each directory with tests, 
 5 | using the Dockerfile in the .devcontainer directory. It will then run pytest in the Docker container
 6 | and save the test results and coverage report to the build artifacts directory. This script can be run
 7 | locally or also in an ADO CI pipeline or Github Actions CI pipeline. See the 
 8 | .azuredevops/ado-ci-pipeline-ms-hosted.yml file for an example use in an ADO CI pipeline and the
 9 | .github/workflows/ci.yaml for an example use in Github Actions pipeline.
10 | '
11 | 
12 | set -eE
13 | 
14 | repo_root="$(pwd)"
15 | 
16 | # Find all the 'src' subdirectories with a 'tests' folder, extract the dir name as test_dir_parent
17 | for test_dir_parent in $(find "${repo_root}/src" -type d -name 'tests' -exec dirname {} \; | sed "s|${repo_root}/src/||"); do
18 |     # Check for at least one Python file in the 'tests' subdirectory of test_dir_parent
19 |     count_test_py_files=$(find "${repo_root}/src/${test_dir_parent}/tests"/*.py 2>/dev/null | wc -l)
20 |     if [ $count_test_py_files != 0 ]; then
21 |         # Use the devcontainer Dockerfile to build a Docker image for the module to run tests
22 |         docker build "${repo_root}" -f "${repo_root}/src/${test_dir_parent}/.devcontainer/Dockerfile" -t "${test_dir_parent}"
23 |         
24 |         echo "Running tests for ${test_dir_parent}, found ${count_test_py_files} test files"
25 |         
26 |         : '
27 |         Run the tests in the built Docker container, saving the test results and coverage report to /tmp/artifact_output.
28 |         Some other key parts of the docker run command are explained here:
29 |            - The local /tmp dir is mounted to docker /tmp so that there are no permission issues with the docker user and the 
30 |              pipeline user that runs this script and the user that accesses the test results and coverage report artifacts.
31 |            - The --cov-append option tells pytest coverage to append the results to the existing coverage data, instead of 
32 |              overwriting it, this builds up coverage for each $test_dir_parent in a single coverage report for publishing.
33 |            - Set the .coverage location to be under /tmp so it is writable, coverage.py uses this file to store intermediate 
34 |              data while measuring code coverage across multiple test runs or when combining data from multiple sources.
35 |            - exit with pytest exit code to ensure script exits with non-zero exit code if pytest fails, this ensure the CI
36 |              pipeline in ADO fails if any tests fail.
37 |         '
38 |         docker run  \
39 |             -v "${repo_root}:/workspace" \
40 |             -v "/tmp:/tmp" \
41 |             --env test_dir_parent="$test_dir_parent" \
42 |             --env COVERAGE_FILE=/tmp/artifact_output/.coverage \
43 |             "${test_dir_parent}" \
44 |             /bin/bash -ec '
45 |                 mkdir -p /tmp/artifact_output/$test_dir_parent; \
46 |                 env "PATH=$PATH" \
47 |                 env "PYTHONPATH=/workspace/src/$test_dir_parent:$PYTHONPATH" \
48 |                 pytest \
49 |                     --junitxml=/tmp/artifact_output/$test_dir_parent/test-results-$test_dir_parent.xml \
50 |                     -o junit_suite_name=$test_dir_parent \
51 |                     --doctest-modules \
52 |                     --cov \
53 |                     --cov-config=/workspace/pyproject.toml \
54 |                     --cov-report=xml:/tmp/artifact_output/coverage.xml \
55 |                     --cov-append \
56 |                     /workspace/src/$test_dir_parent; \
57 |                 exit $?'
58 |     fi
59 | done
60 | 
61 | : '
62 | If running CI on ADO with MS-hosted agents, copy the test and coverage results to the build artifacts directory
63 | so that it is preserved for publishing. See the .azuredevops/ado-ci-pipeline-ms-hosted.yml file for how the 
64 | BUILD_ARTIFACTSTAGINGDIRECTORY is set.
65 | '
66 | if [ -n "$BUILD_ARTIFACTSTAGINGDIRECTORY" ]; then
67 |     cp -r /tmp/artifact_output/* "${BUILD_ARTIFACTSTAGINGDIRECTORY}"
68 | fi
69 | 


--------------------------------------------------------------------------------
/notebooks/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11.12
 2 | # create non-root user and set the default user
 3 | ARG USERNAME=devuser
 4 | ARG USER_UID=1000
 5 | ARG USER_GID=$USER_UID
 6 | RUN groupadd --gid $USER_GID $USERNAME \
 7 |     && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \
 8 |     # Add sudo support
 9 |     && apt-get update \
10 |     && apt-get install -y sudo \
11 |     && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
12 |     && chmod 0440 /etc/sudoers.d/$USERNAME \
13 |     && rm -rf /var/lib/apt/lists/*
14 | USER $USERNAME
15 | 
16 | # make all python tools installed by pip accesible
17 | ENV PATH=$PATH:/home/$USERNAME/.local/bin
18 | RUN pip install --no-cache-dir pip --upgrade
19 | COPY requirements-dev.txt .
20 | RUN pip install --no-cache-dir -r requirements-dev.txt
21 | 
22 | # install notebooks related depencencies
23 | COPY notebooks/.devcontainer/requirements.txt .
24 | RUN pip install --no-cache-dir -r requirements.txt
25 | 


--------------------------------------------------------------------------------
/notebooks/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "build": {
 3 |         // use root directory as build context so that requirements-dev.txt is accessible during build
 4 |         "context": "../../",
 5 |         "dockerfile": "Dockerfile"
 6 |     },
 7 |     "shutdownAction": "none",
 8 |     "features": {
 9 |         "ghcr.io/devcontainers/features/common-utils:2": {
10 |             "installZsh": true,
11 |             "configureZshAsDefaultShell": true,
12 |             "installOhMyZsh": true,
13 |             "upgradePackages": false,
14 |             "username": "devuser",
15 |         },
16 |     },
17 |     "runArgs": [
18 |         "--env-file",
19 |         "../.env"
20 |     ],
21 |     "postCreateCommand": "pre-commit install --overwrite",
22 |     "customizations": {
23 |         "vscode": {
24 |             "extensions": [
25 |                 "codezombiech.gitignore",
26 |                 "DavidAnson.vscode-markdownlint",
27 |                 "donjayamanne.githistory",
28 |                 "donjayamanne.python-environment-manager",
29 |                 "donjayamanne.vscode-default-python-kernel",
30 |                 "eamodio.gitlens",
31 |                 "GitHub.copilot",
32 |                 "github.copilot-chat",
33 |                 "Gruntfuggly.todo-tree",
34 |                 "ionutvmi.path-autocomplete",
35 |                 "marchiore.csvtomarkdown",
36 |                 "mechatroner.rainbow-csv",
37 |                 "ms-azure-devops.azure-pipelines",
38 |                 "ms-python.mypy-type-checker",
39 |                 "ms-python.python",
40 |                 "ms-toolsai.jupyter",
41 |                 "ms-vsliveshare.vsliveshare",
42 |                 "njpwerner.autodocstring",
43 |                 "redhat.vscode-yaml",
44 |                 "streetsidesoftware.code-spell-checker",
45 |                 "timonwong.shellcheck",
46 |                 "charliermarsh.ruff",
47 |                 "grapecity.gc-excelviewer"
48 |             ],
49 |             "settings": {
50 |                 "autoDocstring.docstringFormat": "google",
51 |                 "mypy-type-checker.importStrategy": "fromEnvironment",
52 |                 "python.testing.pytestEnabled": true,
53 |                 "python.defaultInterpreterPath": "/usr/local/bin/python",
54 |                 "[python]": {
55 |                     "editor.codeActionsOnSave": {
56 |                         "source.fixAll": "explicit",
57 |                         "source.organizeImports": "explicit"
58 |                     },
59 |                     "editor.defaultFormatter": "charliermarsh.ruff",
60 |                     "editor.formatOnSave": true,
61 |                     "files.trimTrailingWhitespace": true
62 |                 },
63 |                 "notebook.formatOnSave.enabled": true,
64 |                 "notebook.codeActionsOnSave": {
65 |                     "notebook.source.fixAll": "explicit",
66 |                     "notebook.source.organizeImports": "explicit"
67 |                 }
68 |             },
69 |         }
70 |     },
71 | }
72 | 


--------------------------------------------------------------------------------
/notebooks/.devcontainer/requirements.txt:
--------------------------------------------------------------------------------
1 | # notebooks specific requirements
2 | ipykernel==6.29.5
3 | nbconvert==7.16.6
4 | nbformat==5.10.4
5 | 


--------------------------------------------------------------------------------
/notebooks/sample_notebook.py:
--------------------------------------------------------------------------------
1 | # %%
2 | print("a")
3 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.ruff]
 2 | target-version = "py311"
 3 | extend-exclude = ["notebooks"]
 4 | 
 5 | [tool.ruff.lint]
 6 | # On top of the default `select` (`E`, `F`), 
 7 | # enable 
 8 | # - flake8-bugbear (`B`)
 9 | # - flake8-bandit (S)
10 | # - isort (I)
11 | # - pep8-naming (N)
12 | select = ["E", "F", "B", "S", "I", "N"]
13 | 
14 | 
15 | [tool.ruff.lint.per-file-ignores]
16 | "**/tests/**/test_*.py" = [
17 |     "S101", # asserts allowed in tests
18 | ]
19 | "**/*.ipynb" = [
20 |     "B018", # allow notebooks printing out variables in the mid cell with variable names only
21 | ]
22 | 
23 | [tool.pytest.ini_options]
24 | pythonpath = "src"
25 | 
26 | [tool.coverage.run]
27 | omit = [
28 |     # ignore all notebooks in src
29 |     "*/notebooks/*",
30 |     # ignore all tests in src
31 |     "*/tests/*",
32 | ]
33 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | mypy==1.15.0
2 | pytest==8.3.5
3 | pre-commit==4.2.0
4 | pytest-cov==6.1.1
5 | ruff==0.11.8
6 | 


--------------------------------------------------------------------------------
/src/.amlignore:
--------------------------------------------------------------------------------
 1 | **/outputs
 2 | **/data
 3 | **/logs
 4 | **/tests
 5 | **/__pycache__
 6 | **/.mypy_cache
 7 | **/.pytest_cache
 8 | **/.vscode
 9 | **/junit
10 | **/.azuredevops
11 | **/.venv
12 | **/venv
13 | **/*.md
14 | **/train_artifacts
15 | **/mlruns


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/dstoolkit-devcontainers/1561dffab6598420f486138ad9f3ae69a100faf6/src/__init__.py


--------------------------------------------------------------------------------
/src/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/dstoolkit-devcontainers/1561dffab6598420f486138ad9f3ae69a100faf6/src/common/__init__.py


--------------------------------------------------------------------------------
/src/common/requirements.txt:
--------------------------------------------------------------------------------
1 | # libraries for common modules
2 | ipykernel==6.29.5
3 | 


--------------------------------------------------------------------------------
/src/sample_cpu_project/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11.12
 2 | # create non-root user and set the default user
 3 | ARG USERNAME=devuser
 4 | ARG USER_UID=1000
 5 | ARG USER_GID=$USER_UID
 6 | RUN groupadd --gid $USER_GID $USERNAME \
 7 |     && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \
 8 |     # Add sudo support
 9 |     && apt-get update \
10 |     && apt-get install -y sudo \
11 |     && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
12 |     && chmod 0440 /etc/sudoers.d/$USERNAME \
13 |     && rm -rf /var/lib/apt/lists/*
14 | USER $USERNAME
15 | 
16 | # make all python tools installed by pip accesible
17 | ENV PATH=$PATH:/home/devuser/.local/bin
18 | 
19 | RUN pip install --no-cache-dir pip --upgrade
20 | 
21 | COPY src/sample_cpu_project/.devcontainer/requirements.txt .
22 | RUN pip install --no-cache-dir -r requirements.txt
23 | 
24 | # install common module related pacakages
25 | COPY src/common/requirements.txt .
26 | RUN pip install --no-cache-dir -r requirements.txt
27 | 
28 | # install python tools
29 | COPY requirements-dev.txt .
30 | RUN pip install --no-cache-dir -r requirements-dev.txt
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/src/sample_cpu_project/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "build": {
 3 |         // use root directory as build context so that requirements-dev.txt is accessible during build
 4 |         "context": "../../../",
 5 |         "dockerfile": "Dockerfile"
 6 |     },
 7 |     "shutdownAction": "none",
 8 |     "features": {
 9 |         "ghcr.io/devcontainers/features/common-utils:2": {
10 |             "installZsh": true,
11 |             "configureZshAsDefaultShell": true,
12 |             "installOhMyZsh": true,
13 |             "upgradePackages": false,
14 |             "username": "devuser",
15 |         },
16 |     },
17 |     "runArgs": [
18 |         "--env-file",
19 |         "../../.env"
20 |     ],
21 |     "postCreateCommand": "pre-commit install --overwrite",
22 |     "customizations": {
23 |         "vscode": {
24 |             "extensions": [
25 |                 "codezombiech.gitignore",
26 |                 "DavidAnson.vscode-markdownlint",
27 |                 "donjayamanne.githistory",
28 |                 "donjayamanne.python-environment-manager",
29 |                 "donjayamanne.vscode-default-python-kernel",
30 |                 "eamodio.gitlens",
31 |                 "GitHub.copilot",
32 |                 "github.copilot-chat",
33 |                 "Gruntfuggly.todo-tree",
34 |                 "ionutvmi.path-autocomplete",
35 |                 "marchiore.csvtomarkdown",
36 |                 "mechatroner.rainbow-csv",
37 |                 "ms-azure-devops.azure-pipelines",
38 |                 "ms-python.mypy-type-checker",
39 |                 "ms-python.python",
40 |                 "ms-toolsai.jupyter",
41 |                 "ms-vsliveshare.vsliveshare",
42 |                 "njpwerner.autodocstring",
43 |                 "redhat.vscode-yaml",
44 |                 "streetsidesoftware.code-spell-checker",
45 |                 "timonwong.shellcheck",
46 |                 "charliermarsh.ruff",
47 |                 "grapecity.gc-excelviewer"
48 |             ],
49 |             "settings": {
50 |                 "autoDocstring.docstringFormat": "google",
51 |                 "mypy-type-checker.importStrategy": "fromEnvironment",
52 |                 "python.testing.pytestEnabled": true,
53 |                 "python.defaultInterpreterPath": "/usr/local/bin/python",
54 |                 "[python]": {
55 |                     "editor.codeActionsOnSave": {
56 |                         "source.fixAll": "explicit",
57 |                         "source.organizeImports.ruff": "explicit"
58 |                     },
59 |                     "editor.defaultFormatter": "charliermarsh.ruff",
60 |                     "editor.formatOnSave": true,
61 |                     "files.trimTrailingWhitespace": true
62 |                 },
63 |                 "notebook.formatOnSave.enabled": true,
64 |                 "notebook.codeActionsOnSave": {
65 |                     "notebook.source.fixAll": "explicit",
66 |                     "notebook.source.organizeImports": "explicit"
67 |                 },
68 |             },
69 |         }
70 |     },
71 | }
72 | 


--------------------------------------------------------------------------------
/src/sample_cpu_project/.devcontainer/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/dstoolkit-devcontainers/1561dffab6598420f486138ad9f3ae69a100faf6/src/sample_cpu_project/.devcontainer/requirements.txt


--------------------------------------------------------------------------------
/src/sample_cpu_project/sample_main.py:
--------------------------------------------------------------------------------
1 | def main(x: int, y: int):
2 |     return x + y
3 | 


--------------------------------------------------------------------------------
/src/sample_cpu_project/tests/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/dstoolkit-devcontainers/1561dffab6598420f486138ad9f3ae69a100faf6/src/sample_cpu_project/tests/.gitkeep


--------------------------------------------------------------------------------
/src/sample_cpu_project/tests/test_dummy.py:
--------------------------------------------------------------------------------
1 | from sample_cpu_project import sample_main
2 | 
3 | 
4 | def test_main():
5 |     assert sample_main.main(1, 2) == 3
6 | 


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/.amlignore:
--------------------------------------------------------------------------------
 1 | **/outputs
 2 | **/data
 3 | **/logs
 4 | **/tests
 5 | **/__pycache__
 6 | **/.mypy_cache
 7 | **/.pytest_cache
 8 | **/.vscode
 9 | **/junit
10 | **/.azuredevops
11 | **/.venv
12 | **/venv
13 | **/*.md
14 | **/train_artifacts


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11.12
 2 | # install Azure CLI
 3 | RUN wget -qO- https://aka.ms/InstallAzureCLIDeb | bash
 4 | # create non-root user and set the default user
 5 | ARG USERNAME=devuser
 6 | ARG USER_UID=1000
 7 | ARG USER_GID=$USER_UID
 8 | RUN groupadd --gid $USER_GID $USERNAME \
 9 |     && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \
10 |     # Add sudo support
11 |     && apt-get update \
12 |     && apt-get install -y sudo \
13 |     && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
14 |     && chmod 0440 /etc/sudoers.d/$USERNAME \
15 |     && rm -rf /var/lib/apt/lists/*
16 | USER $USERNAME
17 | 
18 | 
19 | # make all python tools installed by pip accesible
20 | ENV PATH=$PATH:/home/$USERNAME/.local/bin
21 | 
22 | RUN pip install pip --upgrade
23 | 
24 | COPY src/sample_pytorch_gpu_project/.devcontainer/requirements.txt .
25 | RUN pip install --no-cache-dir -r requirements.txt
26 | 
27 | # install common module related pacakages
28 | COPY src/common/requirements.txt .
29 | RUN pip install --no-cache-dir -r requirements.txt
30 | 
31 | # install python tools
32 | COPY requirements-dev.txt .
33 | RUN pip install --no-cache-dir -r requirements-dev.txt
34 | 
35 | RUN az extension add --name ml
36 | 


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "build": {
 3 |         // use root directory as build context so that requirements-dev.txt is accessible during build
 4 |         "context": "../../../",
 5 |         "dockerfile": "Dockerfile"
 6 |     },
 7 |     "shutdownAction": "none",
 8 |     "features": {
 9 |         "ghcr.io/devcontainers/features/common-utils:2": {
10 |             "installZsh": true,
11 |             "configureZshAsDefaultShell": true,
12 |             "installOhMyZsh": true,
13 |             "upgradePackages": false,
14 |             "username": "devuser",
15 |         },
16 |     },
17 |     "runArgs": [
18 |         "--gpus",
19 |         "all",
20 |         "--env-file",
21 |         "../../.env"
22 |     ],
23 |     "postCreateCommand": "pre-commit install --overwrite",
24 |     "customizations": {
25 |         "vscode": {
26 |             "extensions": [
27 |                 "codezombiech.gitignore",
28 |                 "DavidAnson.vscode-markdownlint",
29 |                 "donjayamanne.githistory",
30 |                 "donjayamanne.python-environment-manager",
31 |                 "donjayamanne.vscode-default-python-kernel",
32 |                 "eamodio.gitlens",
33 |                 "GitHub.copilot",
34 |                 "github.copilot-chat",
35 |                 "Gruntfuggly.todo-tree",
36 |                 "ionutvmi.path-autocomplete",
37 |                 "marchiore.csvtomarkdown",
38 |                 "mechatroner.rainbow-csv",
39 |                 "ms-azure-devops.azure-pipelines",
40 |                 "ms-python.mypy-type-checker",
41 |                 "ms-python.python",
42 |                 "ms-toolsai.jupyter",
43 |                 "ms-vsliveshare.vsliveshare",
44 |                 "njpwerner.autodocstring",
45 |                 "redhat.vscode-yaml",
46 |                 "streetsidesoftware.code-spell-checker",
47 |                 "timonwong.shellcheck",
48 |                 "charliermarsh.ruff",
49 |                 "grapecity.gc-excelviewer"
50 |             ],
51 |             "settings": {
52 |                 "autoDocstring.docstringFormat": "google",
53 |                 "mypy-type-checker.importStrategy": "fromEnvironment",
54 |                 "python.testing.pytestEnabled": true,
55 |                 "python.defaultInterpreterPath": "/usr/local/bin/python",
56 |                 "[python]": {
57 |                     "editor.codeActionsOnSave": {
58 |                         "source.fixAll": "explicit",
59 |                         "source.organizeImports": "explicit"
60 |                     },
61 |                     "editor.defaultFormatter": "charliermarsh.ruff",
62 |                     "editor.formatOnSave": true,
63 |                     "files.trimTrailingWhitespace": true
64 |                 },
65 |                 "notebook.formatOnSave.enabled": true,
66 |                 "notebook.codeActionsOnSave": {
67 |                     "notebook.source.fixAll": "explicit",
68 |                     "notebook.source.organizeImports": "explicit"
69 |                 }
70 |             },
71 |         }
72 |     },
73 | }
74 | 


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/.devcontainer/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==2.7.0
2 | torchvision==0.22.0
3 | # the below are used by AML, can be deleted if not using AML
4 | mlflow==2.21.3
5 | azureml-mlflow==1.60.0
6 | 


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/.gitignore:
--------------------------------------------------------------------------------
1 | mlruns
2 | outputs


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/README.md:
--------------------------------------------------------------------------------
  1 | # AML Components-based Pipeline Example
  2 | 
  3 | ## Introduction
  4 | 
  5 | This subdirectory contains a configured and tested lightweight Azure Machine Learning (AML) CLI v2 compopnents-based ML pipeline example. Read more about [AML components-based pipelines](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-component-pipelines-cli?view=azureml-api-2). This example allows you to seemlessly move from working in the local Dev Container environment to a cloud based environment with the exact same Dockerfile.
  6 | 
  7 | Two example files are provided, `train.py` and `inference.py` which contains a pytorch example (taken from [this pytorch tutorial](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html?highlight=cifar10)) for training a small conv-net on CIFAR10 and performing inference and evaluation with the trained model. These files are also wrapped in AML components (`aml_example/aml_components/train-component.yaml` and `aml_example/aml_components/inference-component.yaml`) which is then composed in a AML components-based pipeline in `aml_example/sample-aml-components-pipeline.yml`. The example can thus be run locally inside the Dev Container or in the cloud in AML, with the exact same environment. See the sections below for [how to setup](#setting-up-aml-for-running-a-pipeline) and [run the example in AML](#run-the-aml-component-example).
  8 | 
  9 | As an exmaple workflow, you could work with the sample `train.py` and `inference.py` with your local CPU/GPU to get things working and then easily transition to running the same scripts in an AML cloud environment that has could have a more powerful GPU.
 10 | 
 11 | ## Setting up AML for running a pipeline
 12 | 
 13 | The sections below go through the setup required for running the AML pipeline-components example.
 14 | 
 15 | ### 1. Dev Container Setup
 16 | 
 17 | Ensure you have run through the [project setup steps outlined](../../README.md#getting-started) in the top-level README. When going through these steps you could also add the AML environment variables to the `.env` file to avoid refering to them in each CLI command. The `.env` file contains commented out names of the the required variables.
 18 | 
 19 | ### 2. Azure prerequisites
 20 | 
 21 | Before you try out the AML example, you will need setup an Azure account with the following:
 22 | 
 23 | - If you don't have an Azure subscription, create a free account before you begin. Try the [free or paid version of Azure Machine Learning](https://azure.microsoft.com/free/).
 24 | 
 25 | - An Azure Machine Learning workspace. [Create workspace resources](https://learn.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources?view=azureml-api-2).
 26 | 
 27 | ### 3. Using the AML CLI v2
 28 | 
 29 | The Dev Container environment comes configured with Azure CLI and the AML CLI v2 extension. See  [how to configure AML CLI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-configure-cli?view=azureml-api-2&tabs=public) for background information.
 30 | 
 31 | With your Dev Container launched for `src/sample_pytorch_gpu_project`, verify that the AML CLI v2 extension is installed with:
 32 | 
 33 | ```bash
 34 | az ml
 35 | ```
 36 | 
 37 | If this runs then log-in to your Azure account with:
 38 | 
 39 | ```bash
 40 | az login
 41 | ```
 42 | 
 43 | Alternatively, you may need to specify the specific tenant that contains the Subscription and Workspace you will be running AML jobs in:
 44 | 
 45 | ```bash
 46 | az login --use-device-code --tenant <YOUR_AZURE_TENANT_DOMAIN>
 47 | ```
 48 | 
 49 | Note to avoid manually specifying `-g <YOU_AML_RESOURCE_GROUP> -w <YOU_AML_WORKSPACE>` in the `az` commands below you can place these secrets in your `.env` file in the root of the repository (not tracked by git). You will need to relaunch the Dev Container after adding these.
 50 | 
 51 | ### 4. Setup AML Compute and Docker Environment
 52 | 
 53 | After logging into the AML CLI you will need to setup a AML compute cluster and AML custom environment to run the train.py and inference.py scripts in the AML components pipeline example (`aml_example/sample-aml-components-pipeline.yml`).
 54 | 
 55 | ### 4a. Setup the AML compute cluster
 56 | 
 57 | There are two options provided to setup compute clusters, a GPU cluster (using `aml_example/aml_setup/create-gpu-compute.yaml`) and a CPU cluster (using `aml_example/aml_setup/create-cpu-compute.yaml`). To run the example we will just create the GPU cluster, but in the future you may create both GPU and CPU clusters and then use a mix of compute across different types of scripts (eg. GPU for training and CPU for an evaluation script).
 58 | 
 59 | 1. First update the `location` and `size` parameters in the  `aml_example/aml_setup/create-gpu-compute.yaml` to match the requirements for your subscription and AML workspace:
 60 | 
 61 |     ```yaml
 62 |     size: Standard_NC6
 63 |     location: centralus
 64 |     ```
 65 | 
 66 | 2. Create the compute cluster from the command line inside the Dev Container:
 67 | 
 68 |     ```bash
 69 |     az ml compute create -f aml_example/aml_setup/create-gpu-compute.yaml -g <YOU_AML_RESOURCE_GROUP> -w <YOU_AML_WORKSPACE>
 70 |     ```
 71 | 
 72 | ### 4b. Setup the AML custom environment
 73 | 
 74 | We will use the exact same Dockerfile that specifies the Dev Container and local running environment for running jobs in AML so that there is a seemless transition to the cloud.
 75 | 
 76 | Create the custom AML environment from the command line inside the Dev Container:
 77 | 
 78 | ```bash
 79 | az ml environment create --file aml_example/aml_setup/create-env.yaml -g <YOU_AML_RESOURCE_GROUP> -w <YOU_AML_WORKSPACE>
 80 | ```
 81 | 
 82 | #### **Updating the AML Custom Environment**
 83 | 
 84 | Note the AML environment will need to be updated manually anytime new dependencies are added to `.devcontainer/requirements.txt` or `.devcontainer/Dockerfile` is updated. Also if you add new dependencies in `src/common/requirements.txt` that are needed in `src/sample_pytorch_gpu_project` then this will also require a environment rebuild. The environment can be rebuilt by running the exact same command used above to create the environment.
 85 | 
 86 | ## Run the AML Component Example
 87 | 
 88 | After going through the [setup steps](#setting-up-aml-for-running-a-pipeline), you can run the AML component example `aml_example/aml_setup/create-gpu-compute.yaml` which will run `train.py` and `inference.py` in sequence, with the trained model passed between the steps by the pipeline.
 89 | 
 90 | Start the pipeline experiment from the command line inside the Dev Container:
 91 | 
 92 | ```bash
 93 | az ml job create -f aml_example/sample-aml-components-pipeline.yml --web --g <YOU_AML_RESOURCE_GROUP> -w <YOU_AML_WORKSPACE>
 94 | ```
 95 | 
 96 | ## Explanation of AML Files
 97 | 
 98 | ```bash
 99 | src/sample_pytorch_gpu_project/
100 | ├── README.md
101 | ├── aml_example                                 # Contains all AML related files
102 | │   ├── aml_components                          # AML component files that are used in sample-aml-components-pipeline.yml
103 | │   │   ├── inference-component.yaml            # AML CLI v2 inference component that wraps inference.py
104 | │   │   └── train-component.yaml                # AML CLI v2 training component that wraps train.py
105 | │   ├── aml_setup                               # AML workspace setup files
106 | │   │   ├── create-cpu-compute.yaml             # Create AML CPU cluster
107 | │   │   ├── create-env.yaml                     # Create AML custom Docker environment
108 | │   │   └── create-gpu-compute.yaml             # Create AML GPU cluster
109 | │   └── sample-aml-components-pipeline.yml      # Sample AML CLI v2 components pipeline that refers to aml_components/inference-component.yaml and aml_components/train-component.yaml
110 | ├── inference.py                                # Example of pytorch model inference (from a trained model from train.py)
111 | ├── sample_main.py                              # Sample function used by unit tests
112 | ├── tests
113 | │   └── test_dummy.py                           # Sample pytest that calls function from sample_main.py
114 | └── train.py                                    # Example of pytorch model training, can be run locally or in AML job
115 | 
116 | ```
117 | 
118 | ## How to delete all AML dependencies and source files
119 | 
120 | If you don't need to use any of the sample AML integrations follow the steps below to remove all dependencies and related source files.
121 | 
122 | 1. In `.devcontainer/Dockerfile`, remove the following lines :
123 | 
124 |     ```bash
125 |     RUN wget -qO- https://aka.ms/InstallAzureCLIDeb | bash
126 |     ```
127 | 
128 |     and
129 | 
130 |     ```bash
131 |     RUN az extension add --name ml
132 |     ```
133 | 
134 | 2. Remove the `mlflow` dependencies in `.devcontainer/requirements.txt`:
135 | 
136 |     ```txt
137 |     mlflow==2.3.1
138 |     azureml-mlflow==1.50.0
139 |     ```
140 | 
141 |     Note that you could keep the `mlflow` dependency if you want to keep `train.py` and `inference.py` for local runs with `mlflow` logging.
142 | 
143 | 3. Delete the entire `aml_example` directory.
144 | 
145 |     ```bash
146 |     cd /workspace/src/sample_pytorch_gpu_project
147 |     rm -rf aml_example
148 |     ```
149 | 
150 | 4. [Optional] Delete `train.py` and `inference.py` which are included as examples to work with the AML pipeline component. You could also retain these samples for as an example of working with pytorch locally.
151 | 


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/aml_example/aml_components/inference-component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | type: command
 3 | 
 4 | display_name: pytorch-inference-component
 5 | name: pytorch_inference_component
 6 | description: Get best model from training run and do inference on the test set.
 7 | version: 1
 8 | 
 9 | code: /workspace/src
10 | command: >-
11 |   python sample_pytorch_gpu_project/inference.py --train_artifacts_dir ${{inputs.train_artifacts_dir}} --preds_dir ${{outputs.test_set_preds_dir}}
12 | inputs:
13 |   train_artifacts_dir:
14 |     type: uri_folder
15 | outputs:
16 |   test_set_preds_dir:
17 |     type: uri_folder
18 | environment: azureml:pytorch-gpu-env@latest  # should match name used in aml_setup/create-env.yaml


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/aml_example/aml_components/train-component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | type: command
 3 | 
 4 | display_name: pytorch-train-component
 5 | name: pytorch_train_component
 6 | description: Train a Pytorch model.
 7 | version: 1
 8 | 
 9 | code: /workspace/src
10 | command: >-
11 |   python sample_pytorch_gpu_project/train.py --train_artifacts_dir ${{outputs.train_artifacts_dir}} --batch_size ${{inputs.batch_size}}
12 | inputs:
13 |   batch_size:
14 |     type: integer
15 |     default: 4
16 | outputs:
17 |   train_artifacts_dir:
18 |     type: uri_folder
19 | environment: azureml:pytorch-gpu-env@latest  # should match name used in aml_setup/create-env.yaml


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/aml_example/aml_setup/create-cpu-compute.yaml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/amlCompute.schema.json 
2 | name: pytorch-cpu-cluster
3 | type: amlcompute
4 | size: Standard_DS3_v2
5 | min_instances: 0
6 | max_instances: 1
7 | idle_time_before_scale_down: 120
8 | location: centralus


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/aml_example/aml_setup/create-env.yaml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
2 | # https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-environments-v2?tabs=cli
3 | name: pytorch-gpu-env
4 | build:
5 |   path: /workspace/src  # context at this level to include src/common requirements
6 |   dockerfile_path: sample_pytorch_gpu_project/.devcontainer/Dockerfile


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/aml_example/aml_setup/create-gpu-compute.yaml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/amlCompute.schema.json 
2 | name: pytorch-gpu-cluster
3 | type: amlcompute
4 | size: Standard_NC6
5 | min_instances: 0
6 | max_instances: 1
7 | idle_time_before_scale_down: 120
8 | location: centralus


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/aml_example/sample-aml-components-pipeline.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
 2 | type: pipeline
 3 | 
 4 | display_name: sample_pytorch_gpu_pipeline_run  # change this name each run to be descriptive
 5 | experiment_name: sample_pytorch_gpu_pipeline
 6 | description: Pytorch model training, inference, evaluation.
 7 | 
 8 | jobs:
 9 |   train_component:
10 |     type: command
11 |     component: /workspace/src/sample_pytorch_gpu_project/aml_example/aml_components/train-component.yaml
12 |     compute: azureml:pytorch-gpu-cluster
13 |     inputs:
14 |       batch_size: 4
15 |     outputs:
16 |       train_artifacts_dir: 
17 |         type: uri_folder
18 |         mode: rw_mount
19 |   inference_component:
20 |     type: command
21 |     component: /workspace/src/sample_pytorch_gpu_project/aml_example/aml_components/inference-component.yaml
22 |     compute: azureml:pytorch-gpu-cluster
23 |     inputs:
24 |       train_artifacts_dir: ${{parent.jobs.train_component.outputs.train_artifacts_dir}}
25 |     outputs:
26 |       test_set_preds_dir: 
27 |         type: uri_folder
28 |         mode: rw_mount
29 | identity:
30 |   # use with a managed identity assigned to compute cluster
31 |   # type: managed_identity
32 |   # use with user identity that is logged in to AML CLI
33 |   type: user_identity


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/inference.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pathlib import Path
 3 | 
 4 | import mlflow
 5 | import pandas as pd
 6 | import torch
 7 | import torchvision
 8 | import torchvision.transforms as transforms
 9 | from train import Net
10 | 
11 | 
12 | def main(args):
13 |     # keep this setup code
14 |     print("\n".join(f"{k}: {v}" for k, v in sorted(dict(vars(args)).items())))
15 |     dict_args = vars(args)
16 |     mlflow.autolog()
17 |     mlflow.log_params(dict_args)
18 | 
19 |     # code below this comment is a sample only, replace with your own training code
20 |     net = Net()
21 |     net.load_state_dict(torch.load(args.train_artifacts_dir / "cifar_net.pth"))
22 | 
23 |     # transforms.Normalize() uses Imagenet means and stds
24 |     transform = transforms.Compose(
25 |         [
26 |             transforms.ToTensor(),
27 |             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
28 |         ]
29 |     )
30 |     testset = torchvision.datasets.CIFAR10(
31 |         root="./data", train=False, download=True, transform=transform
32 |     )
33 |     testloader = torch.utils.data.DataLoader(
34 |         testset, batch_size=2, shuffle=False, num_workers=2
35 |     )
36 | 
37 |     correct = 0
38 |     total = 0
39 |     combined_predictions = []
40 |     combined_labels = []
41 |     # since we're not training, we don't need to calculate the gradients for our outputs
42 |     with torch.no_grad():
43 |         for data in testloader:
44 |             images, labels = data
45 |             # calculate outputs by running images through the network
46 |             outputs = net(images)
47 |             # the class with the highest energy is what we choose as prediction
48 |             predicted = torch.argmax(outputs.detach(), 1)
49 |             combined_predictions.extend(predicted.tolist())
50 |             combined_labels.extend(labels.tolist())
51 |             total += labels.size(0)
52 |             correct += (predicted == labels).sum().item()
53 | 
54 |     accuracy = correct / total
55 |     print(
56 |         f"Accuracy of the network on the 10000 test images: {100 * accuracy // 1.0} %"
57 |     )
58 |     mlflow.log_metric("test_accuracy", accuracy)
59 | 
60 |     # save predictions CSV to output directory
61 |     df_preds = pd.DataFrame(
62 |         {"label": combined_labels, "prediction": combined_predictions}
63 |     )
64 |     df_preds.to_csv(args.preds_dir / "preds.csv", index=False)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     parser = argparse.ArgumentParser()
69 |     parser.add_argument(
70 |         "--train_artifacts_dir",
71 |         type=Path,
72 |         help="Directory where trained model is saved",
73 |         default=Path("outputs"),
74 |     )
75 |     parser.add_argument(
76 |         "--preds_dir",
77 |         type=Path,
78 |         help="Output folder containing test set predictions CSV file (preds.csv)",
79 |         default=Path("outputs"),
80 |     )
81 |     args = parser.parse_args()
82 |     main(args)
83 | 


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/sample_main.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def add(x: int, y: int):
 5 |     return x + y
 6 | 
 7 | 
 8 | def main():
 9 |     print("torch.cuda.is_available():", torch.cuda.is_available())
10 |     print("torch.cuda.device_count():", torch.cuda.device_count())
11 |     print("torch.backends.mkl.is_available():", torch.backends.mkl.is_available())
12 |     print("torch.backends.cudnn.is_available():", torch.backends.cudnn.is_available())
13 |     print("torch.backends.cuda.is_built():", torch.backends.cuda.is_built())
14 |     print("torch.backends.mkldnn.is_available():", torch.backends.mkldnn.is_available())
15 |     print("torch.version.cuda:", torch.version.cuda)
16 |     print("torch.backends.cudnn.version():", torch.backends.cudnn.version())
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     main()
21 | 


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/tests/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/dstoolkit-devcontainers/1561dffab6598420f486138ad9f3ae69a100faf6/src/sample_pytorch_gpu_project/tests/.gitkeep


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/tests/test_dummy.py:
--------------------------------------------------------------------------------
 1 | from sample_pytorch_gpu_project import sample_main
 2 | 
 3 | 
 4 | def test_main():
 5 |     sample_main.main()
 6 | 
 7 | 
 8 | def test_add():
 9 |     assert sample_main.add(1, 2) == 3
10 | 


--------------------------------------------------------------------------------
/src/sample_pytorch_gpu_project/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from pathlib import Path
  3 | 
  4 | import mlflow
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F  # noqa: N812
  8 | import torch.optim as optim
  9 | import torchvision
 10 | import torchvision.transforms as transforms
 11 | 
 12 | 
 13 | # Example model, delete or replace with your own
 14 | class Net(nn.Module):
 15 |     def __init__(self):
 16 |         super().__init__()
 17 |         self.conv1 = nn.Conv2d(3, 6, 5)
 18 |         self.pool = nn.MaxPool2d(2, 2)
 19 |         self.conv2 = nn.Conv2d(6, 16, 5)
 20 |         self.fc1 = nn.Linear(16 * 5 * 5, 120)
 21 |         self.fc2 = nn.Linear(120, 84)
 22 |         self.fc3 = nn.Linear(84, 10)
 23 | 
 24 |     def forward(self, x):
 25 |         x = self.pool(F.relu(self.conv1(x)))
 26 |         x = self.pool(F.relu(self.conv2(x)))
 27 |         x = torch.flatten(x, 1)  # flatten all dimensions except batch
 28 |         x = F.relu(self.fc1(x))
 29 |         x = F.relu(self.fc2(x))
 30 |         x = self.fc3(x)
 31 |         return x
 32 | 
 33 | 
 34 | def main(args):
 35 |     # keep this setup code
 36 |     print("\n".join(f"{k}: {v}" for k, v in sorted(dict(vars(args)).items())))
 37 |     dict_args = vars(args)
 38 |     args.train_artifacts_dir.mkdir(parents=True, exist_ok=True)
 39 |     mlflow.autolog()
 40 |     mlflow.log_params(dict_args)
 41 | 
 42 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 43 |     print(device)
 44 | 
 45 |     # code below this comment is a sample only, replace with your own training code
 46 | 
 47 |     # transforms.Normalize() uses Imagenet means and stds
 48 |     transform = transforms.Compose(
 49 |         [
 50 |             transforms.ToTensor(),
 51 |             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
 52 |         ]
 53 |     )
 54 | 
 55 |     trainset = torchvision.datasets.CIFAR10(
 56 |         root="./data", train=True, download=True, transform=transform
 57 |     )
 58 |     trainloader = torch.utils.data.DataLoader(
 59 |         trainset, batch_size=args.batch_size, shuffle=True, num_workers=2
 60 |     )
 61 | 
 62 |     net = Net()
 63 |     net.to(device)
 64 |     criterion = nn.CrossEntropyLoss()
 65 |     optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
 66 | 
 67 |     for epoch in range(2):  # loop over the dataset multiple times
 68 |         mini_batch_loss = 0.0
 69 |         for i, data in enumerate(trainloader):
 70 |             # get the inputs; data is a list of [inputs, labels]
 71 |             inputs, labels = data[0].to(device), data[1].to(device)
 72 | 
 73 |             # zero the parameter gradients
 74 |             optimizer.zero_grad()
 75 | 
 76 |             # forward + backward + optimize
 77 |             outputs = net(inputs)
 78 |             loss = criterion(outputs, labels)
 79 |             loss.backward()
 80 |             optimizer.step()
 81 | 
 82 |             # print statistics
 83 |             mini_batch_loss += loss.item()
 84 |             if (i + 1) % 2000 == 0:  # print every 2000 mini-batches
 85 |                 print(f"[{epoch + 1}, {i + 1:5d}] loss: {mini_batch_loss / 2000:.3f}")
 86 |                 mlflow.log_metric(
 87 |                     "Training Loss",
 88 |                     mini_batch_loss / 2000,
 89 |                     step=i + (epoch * len(trainloader)),
 90 |                 )
 91 |                 mini_batch_loss = 0.0
 92 | 
 93 |     print("Finished Training")
 94 | 
 95 |     # save model
 96 |     torch.save(net.state_dict(), args.train_artifacts_dir / "cifar_net.pth")
 97 |     print(f"Model saved to {args.train_artifacts_dir / 'cifar_net.pth'}")
 98 | 
 99 | 
100 | if __name__ == "__main__":
101 |     parser = argparse.ArgumentParser()
102 |     parser.add_argument(
103 |         "--train_artifacts_dir",
104 |         type=Path,
105 |         help="output directory where trained model, checkpoints etc are saved",
106 |         default=Path("outputs"),
107 |     )
108 |     parser.add_argument(
109 |         "--batch_size", type=int, help="the training batch size", default=4
110 |     )
111 |     args = parser.parse_args()
112 |     main(args)
113 | 


--------------------------------------------------------------------------------