├── .azuredevops ├── ado-ci-pipeline-ms-hosted.yml ├── ado-ci-pipeline-self-hosted.yml └── pull_request_template.md ├── .dockerignore ├── .env.example ├── .github ├── dependabot.yml ├── pull_request_template.md └── workflows │ └── ci.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── ci-tests.sh ├── notebooks ├── .devcontainer │ ├── Dockerfile │ ├── devcontainer.json │ └── requirements.txt └── sample_notebook.py ├── pyproject.toml ├── requirements-dev.txt └── src ├── .amlignore ├── __init__.py ├── common ├── __init__.py └── requirements.txt ├── sample_cpu_project ├── .devcontainer │ ├── Dockerfile │ ├── devcontainer.json │ └── requirements.txt ├── sample_main.py └── tests │ ├── .gitkeep │ └── test_dummy.py └── sample_pytorch_gpu_project ├── .amlignore ├── .devcontainer ├── Dockerfile ├── devcontainer.json └── requirements.txt ├── .gitignore ├── README.md ├── aml_example ├── aml_components │ ├── inference-component.yaml │ └── train-component.yaml ├── aml_setup │ ├── create-cpu-compute.yaml │ ├── create-env.yaml │ └── create-gpu-compute.yaml └── sample-aml-components-pipeline.yml ├── inference.py ├── sample_main.py ├── tests ├── .gitkeep └── test_dummy.py └── train.py /.azuredevops/ado-ci-pipeline-ms-hosted.yml: -------------------------------------------------------------------------------- 1 | # Azure DevOps pipeline for CI (Microsoft-hosted version) 2 | # As the Microsoft-hosted agent option has a limit of 10GB of storage for disk outputs from a pipeline, 3 | # this causes an issue when the Docker images for modules under src require more than 10GB of storage. 4 | # If you will run into space issues (or other limitations with a Microsoft hosted agent option outlined in 5 | # https://learn.microsoft.com/en-us/azure/devops/pipelines/agents/hosted?view=azure-devops&tabs=yaml#capabilities-and-limitations), 6 | # consider using the .azuredevops/ado-ci-pipeline-self-hosted.yml version or using scale set agents, see 7 | # this link for more info: https://learn.microsoft.com/en-us/azure/devops/pipelines/agents/scale-set-agents?view=azure-devops 8 | # Note that docker images will only be build for src directories that contain at least one test file, so the 9 | # total space consumed by Docker builds will be dependent on which modules under src contain tests. 10 | # For setting up the pipeline in ADO see: 11 | # https://learn.microsoft.com/en-us/azure/devops/pipelines/agents/pools-queues?view=azure-devops&tabs=yaml%2Cbrowser 12 | 13 | 14 | trigger: 15 | - main 16 | 17 | pool: 18 | vmImage: 'ubuntu-latest' 19 | 20 | steps: 21 | - task: UsePythonVersion@0 22 | displayName: "Use Python 3.11" 23 | inputs: 24 | versionSpec: 3.11 25 | 26 | - script: | 27 | python -m venv venv 28 | source venv/bin/activate 29 | python -m pip install --upgrade pip 30 | pip install -r requirements-dev.txt 31 | pip install pytest-azurepipelines 32 | displayName: "Install requirements" 33 | 34 | # files under venv will be automatically excluded from ruff check by default https://docs.astral.sh/ruff/settings/#exclude 35 | - bash: | 36 | source venv/bin/activate 37 | ruff check --output-format azure 38 | displayName: "Run ruff linter" 39 | 40 | - task: Bash@3 41 | inputs: 42 | targetType: 'filePath' 43 | filePath: ci-tests.sh 44 | env: 45 | BUILD_ARTIFACTSTAGINGDIRECTORY: $(Build.ArtifactStagingDirectory) 46 | displayName: "Run pytest in docker containers" 47 | 48 | - task: PublishTestResults@2 49 | inputs: 50 | testResultsFiles: '**/test-results-*.xml' 51 | searchFolder: $(Build.ArtifactStagingDirectory) 52 | condition: succeededOrFailed() 53 | 54 | # Publish code coverage results 55 | - task: PublishCodeCoverageResults@1 56 | inputs: 57 | codeCoverageTool: 'Cobertura' # Available options: 'JaCoCo', 'Cobertura' 58 | summaryFileLocation: '$(Build.ArtifactStagingDirectory)/coverage.xml' 59 | pathToSources: src/ 60 | #reportDirectory: # Optional 61 | #additionalCodeCoverageFiles: # Optional 62 | failIfCoverageEmpty: false # Optional 63 | -------------------------------------------------------------------------------- /.azuredevops/ado-ci-pipeline-self-hosted.yml: -------------------------------------------------------------------------------- 1 | # Azure DevOps pipeline for CI (self-hoseted version) 2 | # As the Microsoft-hosted agent option has a limit of 10GB of storage for disk outputs from a pipeline, 3 | # this causes an issue when the Docker images for modules under src require more than 10GB of storage. 4 | # The self-hosted agent option allows the storage to be increased based on the VM size. This version 5 | # includes extra clean-up and space management steps relating to docker builds, but it otherwise equivalent 6 | # to the .azuredevops/ado-ci-pipeline-ms-hosted.yml version. 7 | # For setting up a CI pipeline with a self-hosted Linux agent see: 8 | # https://learn.microsoft.com/en-us/azure/devops/pipelines/agents/v2-linux?view=azure-devops 9 | # Note that the CI scripts that this pipeline runs (ci-tests.sh) is designed to be run on a Linux agent, 10 | # but could be adapated to other OSs. 11 | 12 | 13 | trigger: 14 | - main 15 | 16 | pool: 17 | name: Default 18 | demands: 19 | - agent.name -equals mc-ubuntu-agent 20 | workspace: 21 | clean: all 22 | 23 | steps: 24 | - script: | 25 | docker image prune -f 26 | docker container prune -f 27 | displayName: "Docker Cleanup" 28 | 29 | - script: | 30 | df -h 31 | displayName: "Check agent VM space" 32 | 33 | - task: UsePythonVersion@0 34 | displayName: "Use Python 3.11" 35 | inputs: 36 | versionSpec: 3.11 37 | 38 | - script: | 39 | python -m venv venv 40 | source venv/bin/activate 41 | python -m pip install --upgrade pip 42 | pip install -r requirements-dev.txt 43 | pip install pytest-azurepipelines 44 | displayName: "Install requirements" 45 | 46 | - task: UseDotNet@2 47 | inputs: 48 | packageType: 'sdk' 49 | workingDirectory: "src/" 50 | version: '6.x' 51 | 52 | # files under venv will be automatically excluded from ruff check by default https://docs.astral.sh/ruff/settings/#exclude 53 | - bash: | 54 | source venv/bin/activate 55 | ruff check --output-format azure 56 | displayName: "Run ruff linter" 57 | 58 | - task: Bash@3 59 | inputs: 60 | targetType: 'filePath' 61 | filePath: ci-tests.sh 62 | displayName: "Run pytest in docker containers" 63 | 64 | - task: PublishTestResults@2 65 | inputs: 66 | testResultsFiles: "/tmp/artifact_output/**/test-results-*.xml" 67 | condition: succeededOrFailed() 68 | 69 | # Publish code coverage results 70 | - task: PublishCodeCoverageResults@1 71 | inputs: 72 | codeCoverageTool: 'Cobertura' # Available options: 'JaCoCo', 'Cobertura' 73 | summaryFileLocation: '/tmp/artifact_output/coverage.xml' 74 | pathToSources: src/ 75 | #reportDirectory: # Optional 76 | #additionalCodeCoverageFiles: # Optional 77 | failIfCoverageEmpty: false # Optional 78 | 79 | - bash: | 80 | sudo rm -rfv /home/azureuser/myagent/_work/* /home/azureuser/myagent/_work/.* || true 81 | displayName: "Clean-up _work dir" 82 | condition: always() 83 | -------------------------------------------------------------------------------- /.azuredevops/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Purpose 2 | 3 | 4 | ## Does this introduce a breaking change? 5 | 6 | 7 | * [ ] Yes 8 | * [ ] No 9 | 10 | ## Author pre-publish checklist 11 | 12 | 13 | * [ ] No PII in logs or output 14 | * [ ] Made corresponding changes to the documentation 15 | * [ ] All new packages used are included in requirements.txt 16 | * [ ] Functions use type hints, and there are no type hint errors 17 | 18 | ## Pull Request Type 19 | 20 | What kind of change does this Pull Request introduce? 21 | 22 | 23 | * [ ] Bugfix 24 | * [ ] Feature 25 | * [ ] Code style update (formatting, local variables) 26 | * [ ] Refactoring (no functional changes, no api changes) 27 | * [ ] Documentation content changes 28 | * [ ] Experiment notebook 29 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | **/outputs 2 | **/data 3 | **/logs 4 | **/tests 5 | **/__pycache__ 6 | **/.mypy_cache 7 | **/.pytest_cache 8 | **/.vscode 9 | **/junit 10 | **/.azuredevops 11 | **/.github 12 | **/.venv 13 | **/venv 14 | **/*.md 15 | **/train_artifacts 16 | **/mlruns 17 | **/.env 18 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | PYTHONPATH=/workspaces/dstoolkit-devcontainers/src 2 | 3 | # Set these variables if using the AML CLI v2 example under src/sample_pytorch_gpu_project 4 | # GROUP="azureml-examples" 5 | # LOCATION="eastus" 6 | # WORKSPACE="main" 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" 9 | directories: 10 | - "**/*" 11 | schedule: 12 | interval: "monthly" 13 | groups: 14 | pip-minor-patch-updates: 15 | applies-to: version-updates 16 | update-types: 17 | - "minor" 18 | - "patch" 19 | - package-ecosystem: "github-actions" 20 | # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) 21 | directory: "/" 22 | schedule: 23 | interval: "monthly" 24 | - package-ecosystem: "devcontainers" 25 | directories: 26 | - "**/*" 27 | schedule: 28 | interval: "monthly" 29 | - package-ecosystem: "docker" 30 | directories: 31 | - "**/*" 32 | schedule: 33 | interval: "monthly" 34 | ignore: 35 | - dependency-name: "*" 36 | update-types: ["version-update:semver-major", "version-update:semver-minor"] 37 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Purpose 2 | 3 | 4 | ## Does this introduce a breaking change? 5 | 6 | 7 | * [ ] Yes 8 | * [ ] No 9 | 10 | ## Author pre-publish checklist 11 | 12 | 13 | * [ ] No PII in logs or output 14 | * [ ] Made corresponding changes to the documentation 15 | * [ ] All new packages used are included in requirements.txt 16 | * [ ] Functions use type hints, and there are no type hint errors 17 | 18 | ## Pull Request Type 19 | 20 | What kind of change does this Pull Request introduce? 21 | 22 | 23 | * [ ] Bugfix 24 | * [ ] Feature 25 | * [ ] Code style update (formatting, local variables) 26 | * [ ] Refactoring (no functional changes, no api changes) 27 | * [ ] Documentation content changes 28 | * [ ] Experiment notebook 29 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | # Github Actions Workflow for CI 2 | 3 | name: CI 4 | 5 | on: 6 | push: 7 | branches: 8 | - main 9 | pull_request: 10 | branches: 11 | - main 12 | permissions: 13 | contents: read 14 | actions: read 15 | checks: write 16 | jobs: 17 | build: 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - name: Checkout code 22 | uses: actions/checkout@v4 23 | 24 | - name: Setup Python 3.11 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: 3.11 28 | 29 | - name: Install requirements 30 | run: | 31 | python -m venv venv 32 | source venv/bin/activate 33 | python -m pip install --upgrade pip 34 | pip install -r requirements-dev.txt 35 | 36 | - name: Run ruff linter 37 | # files under venv will be automatically excluded from ruff check by default https://docs.astral.sh/ruff/settings/#exclude 38 | run: | 39 | source venv/bin/activate 40 | ruff check --output-format github 41 | 42 | - name: Run pytest in docker containers 43 | run: ./ci-tests.sh 44 | env: 45 | BUILD_ARTIFACTSTAGINGDIRECTORY: ${{ github.workspace }} 46 | 47 | - name: Publish Test Results 48 | uses: dorny/test-reporter@v2 49 | if: always() 50 | with: 51 | name: pytest 52 | path: | 53 | **/test-results-*.xml 54 | reporter: java-junit 55 | 56 | - name: Publish Code Coverage Summary Report 57 | uses: irongut/CodeCoverageSummary@v1.3.0 58 | with: 59 | badge: true 60 | output: both 61 | format: markdown 62 | filename: coverage.xml 63 | 64 | - name: Add code coverage summary markdown to github step summary 65 | run: cat code-coverage-results.md >> $GITHUB_STEP_SUMMARY 66 | 67 | - name: Archive test and code coverage results 68 | uses: actions/upload-artifact@v4 69 | with: 70 | name: test-and-coverage-results 71 | path: | 72 | **/test-reuslts-*.xml 73 | coverage.xml 74 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | logs 3 | .vscode 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | test-output.xml 58 | junit/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | .pybuilder/ 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | # For a library or package, you might want to ignore these files since the code is 93 | # intended to run in multiple environments; otherwise, check them in: 94 | # .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/#use-with-ide 116 | .pdm.toml 117 | 118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 119 | __pypackages__/ 120 | 121 | # Celery stuff 122 | celerybeat-schedule 123 | celerybeat.pid 124 | 125 | # SageMath parsed files 126 | *.sage.py 127 | 128 | # Environments 129 | .env 130 | .venv 131 | env/ 132 | venv/ 133 | ENV/ 134 | env.bak/ 135 | venv.bak/ 136 | 137 | # Spyder project settings 138 | .spyderproject 139 | .spyproject 140 | 141 | # Rope project settings 142 | .ropeproject 143 | 144 | # mkdocs documentation 145 | /site 146 | 147 | # mypy 148 | .mypy_cache/ 149 | .dmypy.json 150 | dmypy.json 151 | 152 | # Pyre type checker 153 | .pyre/ 154 | 155 | # pytype static type analyzer 156 | .pytype/ 157 | 158 | # Cython debug symbols 159 | cython_debug/ 160 | 161 | # PyCharm 162 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 163 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 164 | # and can be added to the global gitignore or merged into this file. For a more nuclear 165 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 166 | #.idea/ 167 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: local 5 | hooks: 6 | - id: ruff sort imports 7 | name: ruff sort imports 8 | entry: ruff check --select I --fix 9 | require_serial: true 10 | language: system 11 | types_or: [python, pyi, jupyter] 12 | - id: ruff format 13 | name: ruff format 14 | entry: ruff format 15 | language: system 16 | require_serial: true 17 | types_or: [python, pyi, jupyter] 18 | - id: ruff lint 19 | name: ruff lint 20 | entry: ruff check --force-exclude 21 | language: system 22 | types_or: [python, pyi, jupyter] 23 | require_serial: true 24 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dev Containers for ML feasibility study with VS Code 2 | 3 | A machine learning and data science project template that makes it easy to work with multiple Docker based [VSCode Dev Containers](https://code.visualstudio.com/docs/devcontainers/containers) in the same repository. The template also makes it easy to transition projects to the cloud and production by including automated code quality checks, pytest configuration, CI pipeline templates and a sample for running on Azure Machine Learning. 4 | 5 | ## Contents 6 | 7 | - [Dev Containers for ML feasibility study with VS Code](#dev-containers-for-ml-feasibility-study-with-vs-code) 8 | - [Contents](#contents) 9 | - [Introduction and Overview](#introduction-and-overview) 10 | - [Features](#features) 11 | - [Getting Started](#getting-started) 12 | - [How to setup dev environment?](#how-to-setup-dev-environment) 13 | - [How to create a new directory under src with a new environment](#how-to-create-a-new-directory-under-src-with-a-new-environment) 14 | - [Directory Structure](#directory-structure) 15 | - [`notebooks` directory vs `src` directory](#notebooks-directory-vs-src-directory) 16 | - [AML Example](#aml-example) 17 | - [CI Pipeline](#ci-pipeline) 18 | - [Running all unit tests with `ci-tests.sh`](#running-all-unit-tests-with-ci-testssh) 19 | - [How to Configure Azure DevOps CI Pipeline](#how-to-configure-azure-devops-ci-pipeline) 20 | - [Choosing between Azure DevOps Microsoft-hosted vs Self-hosted CI Pipeline](#choosing-between-azure-devops-microsoft-hosted-vs-self-hosted-ci-pipeline) 21 | - [How to Configure Github Actions CI Pipeline](#how-to-configure-github-actions-ci-pipeline) 22 | - [Using SSH Keys in Dev Containers](#using-ssh-keys-in-dev-containers) 23 | - [Future Roadmap](#future-roadmap) 24 | - [Contributing](#contributing) 25 | - [Trademarks](#trademarks) 26 | 27 | ## Introduction and Overview 28 | 29 | This repository provides a [VSCode Dev Container](https://code.visualstudio.com/docs/devcontainers/containers) based project template that can help accelerate your Machine Learning inner-loop development phase. The template covers the phases from early ML experimentation (local training/testing) until production oriented ML model training (cloud based training/testing with bigger CPUs and GPUs). 30 | 31 | During the early phase of Machine Learning project, you may face challenges such as each data scientist creating various different python environments that span across CPU and GPU that tend to have different setup procedures. With the power of Dev Containers, you can automate environment setup process across the team and every data scientist will get the exact same environment automatically. This template provides both CPU and GPU Dev Container setup as examples. To support multiple different ML approaches with different python environments to be experimented in one project, this solution allows multiple different Dev Containers to be used in one repository while having a "common" module that will be installed into all Dev Container to enable code reuse across different Dev Containers. 32 | 33 | Another challenge you may face is each data scientist creating a low quality codebase. That is fine during the experimentation stage to keep the team agility high and maximize your team’s experimentation throughput. But when you move to the model productionization stage, you experience the burden of bringing code quality up to production level. With the power of python tools and VSCode extensions configured for this template on top of Dev Containers, you can keep the code quality high automatically without losing your team’s agility and experimentation throughput and ease the transition to the productionization phase. 34 | 35 | ### Features 36 | 37 | - Multiple Dev Container samples (both CPU and GPU) with many common config steps already configured as following: 38 | - Automated code quality checks (linter and auto formatter) and automated fix when possible with ruff on VSCode on save 39 | - Automated code quality checks (linter and auto formatter) with ruff as precommit hook 40 | - Zero effort transition from local env to Azure Machine Learning (cloud based env) by leveraging the same Dockerfile 41 | - Pre-configured VSCode extensions installed such as python, jupyter, shellcheck, code-spell-checker, git tools etc 42 | - [Github Actions and Azure DevOps CI pipelines](#ci-pipeline) that run linter (ruff) and pytest with test result reporting and coverage reporting 43 | - Pull Request templates that helps you to write a good PR description for both Github and Azure DevOps 44 | 45 | This template automates all tedious setup process as much as possible and saves time and reduces setup errors for the entire data scientist team. 46 | 47 | ## Getting Started 48 | 49 | This section provides a comprehensive guide on how to set up a development environment using Dev Containers in Visual Studio Code with step-by-step instructions. 50 | 51 | ### How to setup dev environment? 52 | 53 | 1. Install [Visual Studio Code](https://code.visualstudio.com/) 54 | 1. If your team has a commercial license for Docker Desktop, follow [VS Code Remote Containers | Docker Desktop](https://code.visualstudio.com/docs/remote/containers#_installation). Otherwise, go to [VS Code Remote Containers | Rancher Desktop Docs](https://docs.rancherdesktop.io/how-to-guides/vs-code-remote-containers/) and finish the first step (Install and launch Rancher Desktop. Select dockerd (moby) as the Container Runtime from the Kubernetes Settings menu.) 55 | 1. Install [VSCode Remote - Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) on vscode 56 | 1. **If you forget this step, you will get an error when you try to build the container so make sure you have `.env` at root of this directory before you move on to the next step.**. Copy `.env.example` and rename it to `.env`. This is where you store your credentials etc. `.env` is automatically loaded into dev container as environment variables. When you add new environment variables `.env`, update `.env.example` as well to share that with others but don't include any credentials there. `.env` is gitignored so your credentials in that file won't be accidentally committed. 57 | 1. Run `Dev Containers: Open Folder in Container...` from the Command Palette (F1) and select the `notebooks` directory. 58 | 1. VS Code will then build and start up a container, connect this window to Dev Container: `notebooks`, and install VS Code extensions specified in `notebooks/.devcontainer/devcontainer.json`. `pre-commit install --overwrite` runs as part of `postCreateCommand` in `devcontainer.json` and this will setup your git precommit hook automatically. 59 | 1. Now set up is done. If you want to develop in another directory for example under `src`, run `Dev Containers: Open Folder in Container...` and go to that directory that has `.devcontainer` and that will setup an dev environment for that directory. 60 | 1. When you or others update either `requirements.txt` or `Dockerfile` in your working directory, make sure to rebuild your container to apply those changes to container. Run `Dev Containers: Rebuild and Reopen in Container...` for that. 61 | 62 | ## How to create a new directory under src with a new environment 63 | 64 | 1. Copy `src/sample_cpu_project/` under `src` and rename it. If you need gpu environment, base off of `src/sample_pytorch_gpu_project` instead 65 | 1. Update `COPY sample_cpu_project/.devcontainer/requirements.txt` in `Dockerfile` with a new path 66 | 1. Update other parts of `Dockerfile` if you need 67 | 1. Update `requirements.txt` if you need 68 | 1. Run `Dev Containers: Open Folder in Container...` from the Command Palette (F1) and select the new directory and make sure you can successfully open the new directory on VS Code running in a container 69 | 70 | ## Directory Structure 71 | 72 | This section gives you overview of the directory structure of this template. Only essential files are covered in this structure graph for simplicity. The directory structure is as follows: 73 | 74 | ```bash 75 | . 76 | ├── .azuredevops # CI pipelines for Azure DevOps. Details at section: How to Configure Azure DevOps CI Pipeline 77 | ├── .github # CI pipelines for Github Actions. Details at section: How to Configure Github Actions CI Pipeline 78 | ├── .pre-commit-config.yaml # pre-commit config file with formatting and linting. Setup is covered in Section: Getting Started 79 | ├── .env.example # Example of .env file. Setup is covered in Section: Getting Started 80 | ├── ci-tests.sh # Details at Section: Running all unit tests with ci-tests.sh 81 | ├── data # Directory to keep your data for local training etc. This directory is gitignored 82 | ├── notebooks # Setup process is covered in Section: How to setup dev environment? 83 | │ ├── .devcontainer # dev container related configuration files goes to here following VSCode convention 84 | │ │ ├── devcontainer.json # dev container configuration and VS Code settings, extensions etc. 85 | │ │ ├── Dockerfile # referred in devcontainer.json 86 | │ │ └── requirements.txt # includes python package list for notebooks. used in Dockerfile 87 | │ └── sample_notebook.py # example of interactive python script 88 | ├── pyproject.toml # Setting file for ruff, pytest and pytest-cov 89 | └── src 90 | ├── common # this module is accessible from all modules under src. put functions you want to import across the projects here 91 | │ └── requirements.txt # python package list for common module. installed in all Dockerfile under src. python tools for src goes to here too 92 | ├── sample_cpu_project # cpu project example. Setup process is covered in Section: How to setup dev environment? 93 | │ ├── .devcontainer # dev container related configuration files goes to here following VSCode convention 94 | │ │ ├── devcontainer.json # dev container configuration and VS Code settings, extensions etc. 95 | │ │ ├── Dockerfile # referred in devcontainer.json. Supports only CPU 96 | │ │ └── requirements.txt # includes python package list for sample_cpu_project. used in Dockerfile 97 | │ ├── sample_main.py 98 | │ └── tests # pytest scripts for sample_cpu_project goes here 99 | │ └── test_dummy.py # pytest script example 100 | └── sample_pytorch_gpu_project # gpu project example with pytorch. Setup process is covered in Section: How to setup dev environment? 101 | ├── README.md # README for AML example contained in sample_pytorch_gpu_project 102 | ├── .devcontainer # dev container related configuration files goes to here following VSCode convention 103 | │ ├── devcontainer.json # dev container configuration and VS Code settings, extensions etc. 104 | │ ├── Dockerfile # referred in devcontainer.json. Supports GPU 105 | │ └── requirements.txt # includes python package list for sample_pytorch_gpu_project. used in Dockerfile 106 | ├── aml_example/ # Sample AML CLI v2 Components-based pipeline, including setup YAML. See sample_pytorch_gpu_project/README for full details of files in this directory. 107 | ├── sample_main.py 108 | ├── inference.py # Example pytorch inference/eval script that also works with aml_example 109 | ├── train.py # Example pytorch model training script that also works with aml_example 110 | └── tests # pytest scripts for sample_pytorch_gpu_project goes here 111 | └── test_dummy.py # pytest script example 112 | ``` 113 | 114 | ### `notebooks` directory vs `src` directory 115 | 116 | There are two places to put python scripts/modules in this template. The `notebooks` directory is for experimental or throw-away python scripts and jupyter notebooks that you want to run cell by cell interactively. For example, EDA, one-off visualization codes, new model approaches you are not certain yet if you want to maintain over time typically go to this directory. The `src` directory is for python scripts and modules that you want to reuse and maintain over time. The `src` directory is also where you would put unit tests (typically under a `src/your_module/tests` directory). 117 | 118 | Given the nature of each directory's responsibility, there is also a different quality governance required. One big difference is that pre-commit hooks and CI pipelines run `ruff check` (linter) over `src` but not over `notebooks` (`ruff format` still run for both). For scripts in `notebooks`, we recommend you use [interactive python scripts](https://code.visualstudio.com/docs/python/jupyter-support-py#_convert-jupyter-notebooks-to-python-code-file) where you can have jupyter-like code cells within `.py` files rather than jupyter notebooks `.ipynb`. Interactive python files gives you the following benefits: 119 | 120 | - Comes with full benefits of python extension in VSCode such as code completion, linting, auto formatting, debugging etc 121 | - pre-commit hooks and CI pipelines will work as they run over `.py` files (but not perfectly over `.ipynb` files) 122 | - Python file format is easier to review during a pull request review 123 | 124 | Interactive python scripts and jupyter notebooks are interchangeable as described in [Convert Jupyter notebooks to Python code file](https://code.visualstudio.com/docs/python/jupyter-support-py#_convert-jupyter-notebooks-to-python-code-file) so you can switch between them easily too if you want to use both formats during the development. 125 | 126 | ## AML Example 127 | 128 | An Azure Machine Learning (AML) example is provided under `src/sample_pytorch_gpu_example`. The example is a AML Components-based ML pipeline, that runs a pytorch based training step followed by a inference/evaluation step. This example shows the seamless transition of moving from a local run (inside the Dev Container) of pytorch based training/inference and running in the cloud in the exact same Docker environment with flexible compute options. See the [AML Components-based Pipeline Example README](src/sample_pytorch_gpu_project/README.md) for a detailed explanation and instructions of the example code. 129 | 130 | ## CI Pipeline 131 | 132 | This repository contains templates for running a Continuous Integration (CI) pipeline on either Azure DevOps (under `.azuredevops` directory) or on Github Actions (under `.github` directory). Each of the CI pipeline configurations provided have the following features at a high level: 133 | 134 | - Run code quality checks (`ruff check`) over the repository 135 | - Find all subdirectories under `src` and run all pytest tests inside the associated Docker containers 136 | - Publish test results and code coverage statistics 137 | 138 | We recommend setting up pipeline triggers for PR creation, editing and merging. This will ensure the pipeline runs continuously and will help catch any issues earlier in your development process. 139 | 140 | See the sections below for links on how to setup pipelines with [Azure DevOps](#how-to-configure-azure-devops-ci-pipeline) and [Github Actions](#how-to-configure-github-actions-ci-pipeline). Note that if you are only using one of these platforms to host a pipeline (or neither), you can safely delete either (or both) the `.azuredevops` directory or the `.github` directory. 141 | 142 | ### Running all unit tests with `ci-tests.sh` 143 | 144 | As multiple independent directories can be added under `src`, each with its own Dockerfile and requirements, running unit tests for each directory under `src` needs to be done within the Docker container of each `src` subdirectory. The `ci-tests.sh` script automates this task of running all unit tests for the repository with the following steps: 145 | 146 | 1. Finds all subdirectories under `src` that have at least one `test_*.py` under a `tests` folder 147 | 2. Builds each Docker image for each subdirectory with tests, using the Dockerfile in the associated `.devcontainer` directory 148 | 3. Runs pytest for each subdirectory with tests, inside the matching Docker container built in step 2 149 | 4. Combine all test results and coverage reports from step 3, with reports in a valid format for publishing in either Azure DevOps or Github Actions hosted pipeline 150 | 151 | Note that the `ci-test.sh` script can be run locally as well and it is assumed that all tests are written with pytest. 152 | 153 | ### How to Configure Azure DevOps CI Pipeline 154 | 155 | See [create your first pipeline](https://learn.microsoft.com/en-us/azure/devops/pipelines/create-first-pipeline?view=azure-devops) for how to setup a pipeline in Azure DevOps. Note that to use the provided template in this repository, you will need to specify the path to `.azuredevops/ado-ci-pipeline-ms-hosted.yml` during the pipeline setup process in Azure DevOps. 156 | 157 | #### Choosing between Azure DevOps Microsoft-hosted vs Self-hosted CI Pipeline 158 | 159 | There are two templates for running a CI pipeline in Azure DevOps, a pipeline configuration that uses a Microsoft hosted agent to run the pipeline (`.azuredevops/ado-ci-pipeline-ms-hosted.yml`) and a pipeline configuration that uses a self-hosted agent to run the pipeline (`.azuredevops/ado-ci-pipeline-self-hosted.yml`). 160 | 161 | The Microsoft hosted version is easiest to start with and recommended. Where you may consider switching to the self-hosted version, is when you have added several directories under `src` that have individual containers and the size of all the docker builds in the CI pipeline comes up against the 10GB disk storage limit for Microsoft hosted pipelines (see [resource limitations of Microsoft hosted agents](https://learn.microsoft.com/en-us/azure/devops/pipelines/agents/hosted?view=azure-devops&tabs=yaml#capabilities-and-limitations)). In this case (or when other resource constraints are met) switching to a self-hosted agent pipeline may be an option and the template at `.azuredevops/ado-ci-pipeline-self-hosted.yml` includes additional steps to help manage space consumed by CI pipeline runs. The two versions are otherwise identitical in terms of building each docker container under `src`, running pytest within each of these containers and publishing test results and coverage information. 162 | 163 | ### How to Configure Github Actions CI Pipeline 164 | 165 | Github Actions CI pipeline is defined in `.github/workflows/ci.yaml`. As long as this repository is hosted in github, the pipeline will be automatically triggered when a PR is made or updated as well as when a PR is merged into your main branch with the setting below, so **no additional setting is required**. 166 | 167 | ```yaml 168 | on: 169 | push: 170 | branches: 171 | - main 172 | pull_request: 173 | branches: 174 | - main 175 | ``` 176 | 177 | ## Using SSH Keys in Dev Containers 178 | 179 | If you have connected to the origin repository using SSH authentication, you will need to do a bit of setup to reuse your local SSH key inside a Dev Container automatically, which will allow you to interact with the origin repository (git push, git pull etc.) inside the Dev Container. 180 | 181 | 1. Try the recommendations in the official docs for [sharing git credentials](https://code.visualstudio.com/remote/advancedcontainers/sharing-git-credentials) 182 | 1. If the previous step doesn't work, try the below method, that includes a bit of additional code to add keys to the SSH agent. 183 | 184 | Add the following to your ~/.bash_profile, ~/.profile, ~/.zprofile or similar (by default most WSL users will have only a ~/.profile) so an ssh-agent will be started when needed and default keys will be added to the agent. The ssh-agent will then automatically forward keys to your Dev Container when its launched. 185 | 186 | ```sh 187 | # this part taken from https://code.visualstudio.com/remote/advancedcontainers/sharing-git-credentials 188 | # check that link for the latest version or updates 189 | if [ -z "$SSH_AUTH_SOCK" ]; then 190 | # Check for a currently running instance of the agent 191 | RUNNING_AGENT="`ps -ax | grep 'ssh-agent -s' | grep -v grep | wc -l | tr -d '[:space:]'`" 192 | if [ "$RUNNING_AGENT" = "0" ]; then 193 | # Launch a new instance of the agent 194 | ssh-agent -s &> $HOME/.ssh/ssh-agent 195 | fi 196 | eval `cat $HOME/.ssh/ssh-agent` 197 | fi 198 | 199 | # ADD SSH Keys to the SSH agent 200 | # if using non-default SSH key, add it to ssh-add command like: 201 | # ssh-add /path/to/your/ssh-key 202 | ssh-add 203 | ``` 204 | 205 | ## Future Roadmap 206 | 207 | - Add Docker build caching to Azure DevOps MS hosted CI pipeline 208 | - Investigate making `src/common` installed with `pip -e` 209 | 210 | ## Contributing 211 | 212 | This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit . 213 | 214 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. 215 | 216 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 217 | 218 | ## Trademarks 219 | 220 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 221 | 222 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 223 | 224 | Any use of third-party trademarks or logos are subject to those third-party's policies. 225 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Support 2 | 3 | ## How to file issues and get help 4 | 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 7 | feature request as a new Issue. 8 | 9 | ## Microsoft Support Policy 10 | 11 | Support for this project is limited to the resources listed above. 12 | -------------------------------------------------------------------------------- /ci-tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | : ' 3 | This script will run all unit tests in the repository (for all directories under src/ that 4 | have at least one test_*.py under a tests folder). It will build a Docker image for each directory with tests, 5 | using the Dockerfile in the .devcontainer directory. It will then run pytest in the Docker container 6 | and save the test results and coverage report to the build artifacts directory. This script can be run 7 | locally or also in an ADO CI pipeline or Github Actions CI pipeline. See the 8 | .azuredevops/ado-ci-pipeline-ms-hosted.yml file for an example use in an ADO CI pipeline and the 9 | .github/workflows/ci.yaml for an example use in Github Actions pipeline. 10 | ' 11 | 12 | set -eE 13 | 14 | repo_root="$(pwd)" 15 | 16 | # Find all the 'src' subdirectories with a 'tests' folder, extract the dir name as test_dir_parent 17 | for test_dir_parent in $(find "${repo_root}/src" -type d -name 'tests' -exec dirname {} \; | sed "s|${repo_root}/src/||"); do 18 | # Check for at least one Python file in the 'tests' subdirectory of test_dir_parent 19 | count_test_py_files=$(find "${repo_root}/src/${test_dir_parent}/tests"/*.py 2>/dev/null | wc -l) 20 | if [ $count_test_py_files != 0 ]; then 21 | # Use the devcontainer Dockerfile to build a Docker image for the module to run tests 22 | docker build "${repo_root}" -f "${repo_root}/src/${test_dir_parent}/.devcontainer/Dockerfile" -t "${test_dir_parent}" 23 | 24 | echo "Running tests for ${test_dir_parent}, found ${count_test_py_files} test files" 25 | 26 | : ' 27 | Run the tests in the built Docker container, saving the test results and coverage report to /tmp/artifact_output. 28 | Some other key parts of the docker run command are explained here: 29 | - The local /tmp dir is mounted to docker /tmp so that there are no permission issues with the docker user and the 30 | pipeline user that runs this script and the user that accesses the test results and coverage report artifacts. 31 | - The --cov-append option tells pytest coverage to append the results to the existing coverage data, instead of 32 | overwriting it, this builds up coverage for each $test_dir_parent in a single coverage report for publishing. 33 | - Set the .coverage location to be under /tmp so it is writable, coverage.py uses this file to store intermediate 34 | data while measuring code coverage across multiple test runs or when combining data from multiple sources. 35 | - exit with pytest exit code to ensure script exits with non-zero exit code if pytest fails, this ensure the CI 36 | pipeline in ADO fails if any tests fail. 37 | ' 38 | docker run \ 39 | -v "${repo_root}:/workspace" \ 40 | -v "/tmp:/tmp" \ 41 | --env test_dir_parent="$test_dir_parent" \ 42 | --env COVERAGE_FILE=/tmp/artifact_output/.coverage \ 43 | "${test_dir_parent}" \ 44 | /bin/bash -ec ' 45 | mkdir -p /tmp/artifact_output/$test_dir_parent; \ 46 | env "PATH=$PATH" \ 47 | env "PYTHONPATH=/workspace/src/$test_dir_parent:$PYTHONPATH" \ 48 | pytest \ 49 | --junitxml=/tmp/artifact_output/$test_dir_parent/test-results-$test_dir_parent.xml \ 50 | -o junit_suite_name=$test_dir_parent \ 51 | --doctest-modules \ 52 | --cov \ 53 | --cov-config=/workspace/pyproject.toml \ 54 | --cov-report=xml:/tmp/artifact_output/coverage.xml \ 55 | --cov-append \ 56 | /workspace/src/$test_dir_parent; \ 57 | exit $?' 58 | fi 59 | done 60 | 61 | : ' 62 | If running CI on ADO with MS-hosted agents, copy the test and coverage results to the build artifacts directory 63 | so that it is preserved for publishing. See the .azuredevops/ado-ci-pipeline-ms-hosted.yml file for how the 64 | BUILD_ARTIFACTSTAGINGDIRECTORY is set. 65 | ' 66 | if [ -n "$BUILD_ARTIFACTSTAGINGDIRECTORY" ]; then 67 | cp -r /tmp/artifact_output/* "${BUILD_ARTIFACTSTAGINGDIRECTORY}" 68 | fi 69 | -------------------------------------------------------------------------------- /notebooks/.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.12 2 | # create non-root user and set the default user 3 | ARG USERNAME=devuser 4 | ARG USER_UID=1000 5 | ARG USER_GID=$USER_UID 6 | RUN groupadd --gid $USER_GID $USERNAME \ 7 | && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ 8 | # Add sudo support 9 | && apt-get update \ 10 | && apt-get install -y sudo \ 11 | && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ 12 | && chmod 0440 /etc/sudoers.d/$USERNAME \ 13 | && rm -rf /var/lib/apt/lists/* 14 | USER $USERNAME 15 | 16 | # make all python tools installed by pip accesible 17 | ENV PATH=$PATH:/home/$USERNAME/.local/bin 18 | RUN pip install --no-cache-dir pip --upgrade 19 | COPY requirements-dev.txt . 20 | RUN pip install --no-cache-dir -r requirements-dev.txt 21 | 22 | # install notebooks related depencencies 23 | COPY notebooks/.devcontainer/requirements.txt . 24 | RUN pip install --no-cache-dir -r requirements.txt 25 | -------------------------------------------------------------------------------- /notebooks/.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "build": { 3 | // use root directory as build context so that requirements-dev.txt is accessible during build 4 | "context": "../../", 5 | "dockerfile": "Dockerfile" 6 | }, 7 | "shutdownAction": "none", 8 | "features": { 9 | "ghcr.io/devcontainers/features/common-utils:2": { 10 | "installZsh": true, 11 | "configureZshAsDefaultShell": true, 12 | "installOhMyZsh": true, 13 | "upgradePackages": false, 14 | "username": "devuser", 15 | }, 16 | }, 17 | "runArgs": [ 18 | "--env-file", 19 | "../.env" 20 | ], 21 | "postCreateCommand": "pre-commit install --overwrite", 22 | "customizations": { 23 | "vscode": { 24 | "extensions": [ 25 | "codezombiech.gitignore", 26 | "DavidAnson.vscode-markdownlint", 27 | "donjayamanne.githistory", 28 | "donjayamanne.python-environment-manager", 29 | "donjayamanne.vscode-default-python-kernel", 30 | "eamodio.gitlens", 31 | "GitHub.copilot", 32 | "github.copilot-chat", 33 | "Gruntfuggly.todo-tree", 34 | "ionutvmi.path-autocomplete", 35 | "marchiore.csvtomarkdown", 36 | "mechatroner.rainbow-csv", 37 | "ms-azure-devops.azure-pipelines", 38 | "ms-python.mypy-type-checker", 39 | "ms-python.python", 40 | "ms-toolsai.jupyter", 41 | "ms-vsliveshare.vsliveshare", 42 | "njpwerner.autodocstring", 43 | "redhat.vscode-yaml", 44 | "streetsidesoftware.code-spell-checker", 45 | "timonwong.shellcheck", 46 | "charliermarsh.ruff", 47 | "grapecity.gc-excelviewer" 48 | ], 49 | "settings": { 50 | "autoDocstring.docstringFormat": "google", 51 | "mypy-type-checker.importStrategy": "fromEnvironment", 52 | "python.testing.pytestEnabled": true, 53 | "python.defaultInterpreterPath": "/usr/local/bin/python", 54 | "[python]": { 55 | "editor.codeActionsOnSave": { 56 | "source.fixAll": "explicit", 57 | "source.organizeImports": "explicit" 58 | }, 59 | "editor.defaultFormatter": "charliermarsh.ruff", 60 | "editor.formatOnSave": true, 61 | "files.trimTrailingWhitespace": true 62 | }, 63 | "notebook.formatOnSave.enabled": true, 64 | "notebook.codeActionsOnSave": { 65 | "notebook.source.fixAll": "explicit", 66 | "notebook.source.organizeImports": "explicit" 67 | } 68 | }, 69 | } 70 | }, 71 | } 72 | -------------------------------------------------------------------------------- /notebooks/.devcontainer/requirements.txt: -------------------------------------------------------------------------------- 1 | # notebooks specific requirements 2 | ipykernel==6.29.5 3 | nbconvert==7.16.6 4 | nbformat==5.10.4 5 | -------------------------------------------------------------------------------- /notebooks/sample_notebook.py: -------------------------------------------------------------------------------- 1 | # %% 2 | print("a") 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | target-version = "py311" 3 | extend-exclude = ["notebooks"] 4 | 5 | [tool.ruff.lint] 6 | # On top of the default `select` (`E`, `F`), 7 | # enable 8 | # - flake8-bugbear (`B`) 9 | # - flake8-bandit (S) 10 | # - isort (I) 11 | # - pep8-naming (N) 12 | select = ["E", "F", "B", "S", "I", "N"] 13 | 14 | 15 | [tool.ruff.lint.per-file-ignores] 16 | "**/tests/**/test_*.py" = [ 17 | "S101", # asserts allowed in tests 18 | ] 19 | "**/*.ipynb" = [ 20 | "B018", # allow notebooks printing out variables in the mid cell with variable names only 21 | ] 22 | 23 | [tool.pytest.ini_options] 24 | pythonpath = "src" 25 | 26 | [tool.coverage.run] 27 | omit = [ 28 | # ignore all notebooks in src 29 | "*/notebooks/*", 30 | # ignore all tests in src 31 | "*/tests/*", 32 | ] 33 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | mypy==1.15.0 2 | pytest==8.3.5 3 | pre-commit==4.2.0 4 | pytest-cov==6.1.1 5 | ruff==0.11.8 6 | -------------------------------------------------------------------------------- /src/.amlignore: -------------------------------------------------------------------------------- 1 | **/outputs 2 | **/data 3 | **/logs 4 | **/tests 5 | **/__pycache__ 6 | **/.mypy_cache 7 | **/.pytest_cache 8 | **/.vscode 9 | **/junit 10 | **/.azuredevops 11 | **/.venv 12 | **/venv 13 | **/*.md 14 | **/train_artifacts 15 | **/mlruns -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/dstoolkit-devcontainers/1561dffab6598420f486138ad9f3ae69a100faf6/src/__init__.py -------------------------------------------------------------------------------- /src/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/dstoolkit-devcontainers/1561dffab6598420f486138ad9f3ae69a100faf6/src/common/__init__.py -------------------------------------------------------------------------------- /src/common/requirements.txt: -------------------------------------------------------------------------------- 1 | # libraries for common modules 2 | ipykernel==6.29.5 3 | -------------------------------------------------------------------------------- /src/sample_cpu_project/.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.12 2 | # create non-root user and set the default user 3 | ARG USERNAME=devuser 4 | ARG USER_UID=1000 5 | ARG USER_GID=$USER_UID 6 | RUN groupadd --gid $USER_GID $USERNAME \ 7 | && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ 8 | # Add sudo support 9 | && apt-get update \ 10 | && apt-get install -y sudo \ 11 | && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ 12 | && chmod 0440 /etc/sudoers.d/$USERNAME \ 13 | && rm -rf /var/lib/apt/lists/* 14 | USER $USERNAME 15 | 16 | # make all python tools installed by pip accesible 17 | ENV PATH=$PATH:/home/devuser/.local/bin 18 | 19 | RUN pip install --no-cache-dir pip --upgrade 20 | 21 | COPY src/sample_cpu_project/.devcontainer/requirements.txt . 22 | RUN pip install --no-cache-dir -r requirements.txt 23 | 24 | # install common module related pacakages 25 | COPY src/common/requirements.txt . 26 | RUN pip install --no-cache-dir -r requirements.txt 27 | 28 | # install python tools 29 | COPY requirements-dev.txt . 30 | RUN pip install --no-cache-dir -r requirements-dev.txt 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/sample_cpu_project/.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "build": { 3 | // use root directory as build context so that requirements-dev.txt is accessible during build 4 | "context": "../../../", 5 | "dockerfile": "Dockerfile" 6 | }, 7 | "shutdownAction": "none", 8 | "features": { 9 | "ghcr.io/devcontainers/features/common-utils:2": { 10 | "installZsh": true, 11 | "configureZshAsDefaultShell": true, 12 | "installOhMyZsh": true, 13 | "upgradePackages": false, 14 | "username": "devuser", 15 | }, 16 | }, 17 | "runArgs": [ 18 | "--env-file", 19 | "../../.env" 20 | ], 21 | "postCreateCommand": "pre-commit install --overwrite", 22 | "customizations": { 23 | "vscode": { 24 | "extensions": [ 25 | "codezombiech.gitignore", 26 | "DavidAnson.vscode-markdownlint", 27 | "donjayamanne.githistory", 28 | "donjayamanne.python-environment-manager", 29 | "donjayamanne.vscode-default-python-kernel", 30 | "eamodio.gitlens", 31 | "GitHub.copilot", 32 | "github.copilot-chat", 33 | "Gruntfuggly.todo-tree", 34 | "ionutvmi.path-autocomplete", 35 | "marchiore.csvtomarkdown", 36 | "mechatroner.rainbow-csv", 37 | "ms-azure-devops.azure-pipelines", 38 | "ms-python.mypy-type-checker", 39 | "ms-python.python", 40 | "ms-toolsai.jupyter", 41 | "ms-vsliveshare.vsliveshare", 42 | "njpwerner.autodocstring", 43 | "redhat.vscode-yaml", 44 | "streetsidesoftware.code-spell-checker", 45 | "timonwong.shellcheck", 46 | "charliermarsh.ruff", 47 | "grapecity.gc-excelviewer" 48 | ], 49 | "settings": { 50 | "autoDocstring.docstringFormat": "google", 51 | "mypy-type-checker.importStrategy": "fromEnvironment", 52 | "python.testing.pytestEnabled": true, 53 | "python.defaultInterpreterPath": "/usr/local/bin/python", 54 | "[python]": { 55 | "editor.codeActionsOnSave": { 56 | "source.fixAll": "explicit", 57 | "source.organizeImports.ruff": "explicit" 58 | }, 59 | "editor.defaultFormatter": "charliermarsh.ruff", 60 | "editor.formatOnSave": true, 61 | "files.trimTrailingWhitespace": true 62 | }, 63 | "notebook.formatOnSave.enabled": true, 64 | "notebook.codeActionsOnSave": { 65 | "notebook.source.fixAll": "explicit", 66 | "notebook.source.organizeImports": "explicit" 67 | }, 68 | }, 69 | } 70 | }, 71 | } 72 | -------------------------------------------------------------------------------- /src/sample_cpu_project/.devcontainer/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/dstoolkit-devcontainers/1561dffab6598420f486138ad9f3ae69a100faf6/src/sample_cpu_project/.devcontainer/requirements.txt -------------------------------------------------------------------------------- /src/sample_cpu_project/sample_main.py: -------------------------------------------------------------------------------- 1 | def main(x: int, y: int): 2 | return x + y 3 | -------------------------------------------------------------------------------- /src/sample_cpu_project/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/dstoolkit-devcontainers/1561dffab6598420f486138ad9f3ae69a100faf6/src/sample_cpu_project/tests/.gitkeep -------------------------------------------------------------------------------- /src/sample_cpu_project/tests/test_dummy.py: -------------------------------------------------------------------------------- 1 | from sample_cpu_project import sample_main 2 | 3 | 4 | def test_main(): 5 | assert sample_main.main(1, 2) == 3 6 | -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/.amlignore: -------------------------------------------------------------------------------- 1 | **/outputs 2 | **/data 3 | **/logs 4 | **/tests 5 | **/__pycache__ 6 | **/.mypy_cache 7 | **/.pytest_cache 8 | **/.vscode 9 | **/junit 10 | **/.azuredevops 11 | **/.venv 12 | **/venv 13 | **/*.md 14 | **/train_artifacts -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.12 2 | # install Azure CLI 3 | RUN wget -qO- https://aka.ms/InstallAzureCLIDeb | bash 4 | # create non-root user and set the default user 5 | ARG USERNAME=devuser 6 | ARG USER_UID=1000 7 | ARG USER_GID=$USER_UID 8 | RUN groupadd --gid $USER_GID $USERNAME \ 9 | && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ 10 | # Add sudo support 11 | && apt-get update \ 12 | && apt-get install -y sudo \ 13 | && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ 14 | && chmod 0440 /etc/sudoers.d/$USERNAME \ 15 | && rm -rf /var/lib/apt/lists/* 16 | USER $USERNAME 17 | 18 | 19 | # make all python tools installed by pip accesible 20 | ENV PATH=$PATH:/home/$USERNAME/.local/bin 21 | 22 | RUN pip install pip --upgrade 23 | 24 | COPY src/sample_pytorch_gpu_project/.devcontainer/requirements.txt . 25 | RUN pip install --no-cache-dir -r requirements.txt 26 | 27 | # install common module related pacakages 28 | COPY src/common/requirements.txt . 29 | RUN pip install --no-cache-dir -r requirements.txt 30 | 31 | # install python tools 32 | COPY requirements-dev.txt . 33 | RUN pip install --no-cache-dir -r requirements-dev.txt 34 | 35 | RUN az extension add --name ml 36 | -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "build": { 3 | // use root directory as build context so that requirements-dev.txt is accessible during build 4 | "context": "../../../", 5 | "dockerfile": "Dockerfile" 6 | }, 7 | "shutdownAction": "none", 8 | "features": { 9 | "ghcr.io/devcontainers/features/common-utils:2": { 10 | "installZsh": true, 11 | "configureZshAsDefaultShell": true, 12 | "installOhMyZsh": true, 13 | "upgradePackages": false, 14 | "username": "devuser", 15 | }, 16 | }, 17 | "runArgs": [ 18 | "--gpus", 19 | "all", 20 | "--env-file", 21 | "../../.env" 22 | ], 23 | "postCreateCommand": "pre-commit install --overwrite", 24 | "customizations": { 25 | "vscode": { 26 | "extensions": [ 27 | "codezombiech.gitignore", 28 | "DavidAnson.vscode-markdownlint", 29 | "donjayamanne.githistory", 30 | "donjayamanne.python-environment-manager", 31 | "donjayamanne.vscode-default-python-kernel", 32 | "eamodio.gitlens", 33 | "GitHub.copilot", 34 | "github.copilot-chat", 35 | "Gruntfuggly.todo-tree", 36 | "ionutvmi.path-autocomplete", 37 | "marchiore.csvtomarkdown", 38 | "mechatroner.rainbow-csv", 39 | "ms-azure-devops.azure-pipelines", 40 | "ms-python.mypy-type-checker", 41 | "ms-python.python", 42 | "ms-toolsai.jupyter", 43 | "ms-vsliveshare.vsliveshare", 44 | "njpwerner.autodocstring", 45 | "redhat.vscode-yaml", 46 | "streetsidesoftware.code-spell-checker", 47 | "timonwong.shellcheck", 48 | "charliermarsh.ruff", 49 | "grapecity.gc-excelviewer" 50 | ], 51 | "settings": { 52 | "autoDocstring.docstringFormat": "google", 53 | "mypy-type-checker.importStrategy": "fromEnvironment", 54 | "python.testing.pytestEnabled": true, 55 | "python.defaultInterpreterPath": "/usr/local/bin/python", 56 | "[python]": { 57 | "editor.codeActionsOnSave": { 58 | "source.fixAll": "explicit", 59 | "source.organizeImports": "explicit" 60 | }, 61 | "editor.defaultFormatter": "charliermarsh.ruff", 62 | "editor.formatOnSave": true, 63 | "files.trimTrailingWhitespace": true 64 | }, 65 | "notebook.formatOnSave.enabled": true, 66 | "notebook.codeActionsOnSave": { 67 | "notebook.source.fixAll": "explicit", 68 | "notebook.source.organizeImports": "explicit" 69 | } 70 | }, 71 | } 72 | }, 73 | } 74 | -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/.devcontainer/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==2.7.0 2 | torchvision==0.22.0 3 | # the below are used by AML, can be deleted if not using AML 4 | mlflow==2.21.3 5 | azureml-mlflow==1.60.0 6 | -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/.gitignore: -------------------------------------------------------------------------------- 1 | mlruns 2 | outputs -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/README.md: -------------------------------------------------------------------------------- 1 | # AML Components-based Pipeline Example 2 | 3 | ## Introduction 4 | 5 | This subdirectory contains a configured and tested lightweight Azure Machine Learning (AML) CLI v2 compopnents-based ML pipeline example. Read more about [AML components-based pipelines](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-component-pipelines-cli?view=azureml-api-2). This example allows you to seemlessly move from working in the local Dev Container environment to a cloud based environment with the exact same Dockerfile. 6 | 7 | Two example files are provided, `train.py` and `inference.py` which contains a pytorch example (taken from [this pytorch tutorial](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html?highlight=cifar10)) for training a small conv-net on CIFAR10 and performing inference and evaluation with the trained model. These files are also wrapped in AML components (`aml_example/aml_components/train-component.yaml` and `aml_example/aml_components/inference-component.yaml`) which is then composed in a AML components-based pipeline in `aml_example/sample-aml-components-pipeline.yml`. The example can thus be run locally inside the Dev Container or in the cloud in AML, with the exact same environment. See the sections below for [how to setup](#setting-up-aml-for-running-a-pipeline) and [run the example in AML](#run-the-aml-component-example). 8 | 9 | As an exmaple workflow, you could work with the sample `train.py` and `inference.py` with your local CPU/GPU to get things working and then easily transition to running the same scripts in an AML cloud environment that has could have a more powerful GPU. 10 | 11 | ## Setting up AML for running a pipeline 12 | 13 | The sections below go through the setup required for running the AML pipeline-components example. 14 | 15 | ### 1. Dev Container Setup 16 | 17 | Ensure you have run through the [project setup steps outlined](../../README.md#getting-started) in the top-level README. When going through these steps you could also add the AML environment variables to the `.env` file to avoid refering to them in each CLI command. The `.env` file contains commented out names of the the required variables. 18 | 19 | ### 2. Azure prerequisites 20 | 21 | Before you try out the AML example, you will need setup an Azure account with the following: 22 | 23 | - If you don't have an Azure subscription, create a free account before you begin. Try the [free or paid version of Azure Machine Learning](https://azure.microsoft.com/free/). 24 | 25 | - An Azure Machine Learning workspace. [Create workspace resources](https://learn.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources?view=azureml-api-2). 26 | 27 | ### 3. Using the AML CLI v2 28 | 29 | The Dev Container environment comes configured with Azure CLI and the AML CLI v2 extension. See [how to configure AML CLI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-configure-cli?view=azureml-api-2&tabs=public) for background information. 30 | 31 | With your Dev Container launched for `src/sample_pytorch_gpu_project`, verify that the AML CLI v2 extension is installed with: 32 | 33 | ```bash 34 | az ml 35 | ``` 36 | 37 | If this runs then log-in to your Azure account with: 38 | 39 | ```bash 40 | az login 41 | ``` 42 | 43 | Alternatively, you may need to specify the specific tenant that contains the Subscription and Workspace you will be running AML jobs in: 44 | 45 | ```bash 46 | az login --use-device-code --tenant 47 | ``` 48 | 49 | Note to avoid manually specifying `-g -w ` in the `az` commands below you can place these secrets in your `.env` file in the root of the repository (not tracked by git). You will need to relaunch the Dev Container after adding these. 50 | 51 | ### 4. Setup AML Compute and Docker Environment 52 | 53 | After logging into the AML CLI you will need to setup a AML compute cluster and AML custom environment to run the train.py and inference.py scripts in the AML components pipeline example (`aml_example/sample-aml-components-pipeline.yml`). 54 | 55 | ### 4a. Setup the AML compute cluster 56 | 57 | There are two options provided to setup compute clusters, a GPU cluster (using `aml_example/aml_setup/create-gpu-compute.yaml`) and a CPU cluster (using `aml_example/aml_setup/create-cpu-compute.yaml`). To run the example we will just create the GPU cluster, but in the future you may create both GPU and CPU clusters and then use a mix of compute across different types of scripts (eg. GPU for training and CPU for an evaluation script). 58 | 59 | 1. First update the `location` and `size` parameters in the `aml_example/aml_setup/create-gpu-compute.yaml` to match the requirements for your subscription and AML workspace: 60 | 61 | ```yaml 62 | size: Standard_NC6 63 | location: centralus 64 | ``` 65 | 66 | 2. Create the compute cluster from the command line inside the Dev Container: 67 | 68 | ```bash 69 | az ml compute create -f aml_example/aml_setup/create-gpu-compute.yaml -g -w 70 | ``` 71 | 72 | ### 4b. Setup the AML custom environment 73 | 74 | We will use the exact same Dockerfile that specifies the Dev Container and local running environment for running jobs in AML so that there is a seemless transition to the cloud. 75 | 76 | Create the custom AML environment from the command line inside the Dev Container: 77 | 78 | ```bash 79 | az ml environment create --file aml_example/aml_setup/create-env.yaml -g -w 80 | ``` 81 | 82 | #### **Updating the AML Custom Environment** 83 | 84 | Note the AML environment will need to be updated manually anytime new dependencies are added to `.devcontainer/requirements.txt` or `.devcontainer/Dockerfile` is updated. Also if you add new dependencies in `src/common/requirements.txt` that are needed in `src/sample_pytorch_gpu_project` then this will also require a environment rebuild. The environment can be rebuilt by running the exact same command used above to create the environment. 85 | 86 | ## Run the AML Component Example 87 | 88 | After going through the [setup steps](#setting-up-aml-for-running-a-pipeline), you can run the AML component example `aml_example/aml_setup/create-gpu-compute.yaml` which will run `train.py` and `inference.py` in sequence, with the trained model passed between the steps by the pipeline. 89 | 90 | Start the pipeline experiment from the command line inside the Dev Container: 91 | 92 | ```bash 93 | az ml job create -f aml_example/sample-aml-components-pipeline.yml --web --g -w 94 | ``` 95 | 96 | ## Explanation of AML Files 97 | 98 | ```bash 99 | src/sample_pytorch_gpu_project/ 100 | ├── README.md 101 | ├── aml_example # Contains all AML related files 102 | │ ├── aml_components # AML component files that are used in sample-aml-components-pipeline.yml 103 | │ │ ├── inference-component.yaml # AML CLI v2 inference component that wraps inference.py 104 | │ │ └── train-component.yaml # AML CLI v2 training component that wraps train.py 105 | │ ├── aml_setup # AML workspace setup files 106 | │ │ ├── create-cpu-compute.yaml # Create AML CPU cluster 107 | │ │ ├── create-env.yaml # Create AML custom Docker environment 108 | │ │ └── create-gpu-compute.yaml # Create AML GPU cluster 109 | │ └── sample-aml-components-pipeline.yml # Sample AML CLI v2 components pipeline that refers to aml_components/inference-component.yaml and aml_components/train-component.yaml 110 | ├── inference.py # Example of pytorch model inference (from a trained model from train.py) 111 | ├── sample_main.py # Sample function used by unit tests 112 | ├── tests 113 | │ └── test_dummy.py # Sample pytest that calls function from sample_main.py 114 | └── train.py # Example of pytorch model training, can be run locally or in AML job 115 | 116 | ``` 117 | 118 | ## How to delete all AML dependencies and source files 119 | 120 | If you don't need to use any of the sample AML integrations follow the steps below to remove all dependencies and related source files. 121 | 122 | 1. In `.devcontainer/Dockerfile`, remove the following lines : 123 | 124 | ```bash 125 | RUN wget -qO- https://aka.ms/InstallAzureCLIDeb | bash 126 | ``` 127 | 128 | and 129 | 130 | ```bash 131 | RUN az extension add --name ml 132 | ``` 133 | 134 | 2. Remove the `mlflow` dependencies in `.devcontainer/requirements.txt`: 135 | 136 | ```txt 137 | mlflow==2.3.1 138 | azureml-mlflow==1.50.0 139 | ``` 140 | 141 | Note that you could keep the `mlflow` dependency if you want to keep `train.py` and `inference.py` for local runs with `mlflow` logging. 142 | 143 | 3. Delete the entire `aml_example` directory. 144 | 145 | ```bash 146 | cd /workspace/src/sample_pytorch_gpu_project 147 | rm -rf aml_example 148 | ``` 149 | 150 | 4. [Optional] Delete `train.py` and `inference.py` which are included as examples to work with the AML pipeline component. You could also retain these samples for as an example of working with pytorch locally. 151 | -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/aml_example/aml_components/inference-component.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | type: command 3 | 4 | display_name: pytorch-inference-component 5 | name: pytorch_inference_component 6 | description: Get best model from training run and do inference on the test set. 7 | version: 1 8 | 9 | code: /workspace/src 10 | command: >- 11 | python sample_pytorch_gpu_project/inference.py --train_artifacts_dir ${{inputs.train_artifacts_dir}} --preds_dir ${{outputs.test_set_preds_dir}} 12 | inputs: 13 | train_artifacts_dir: 14 | type: uri_folder 15 | outputs: 16 | test_set_preds_dir: 17 | type: uri_folder 18 | environment: azureml:pytorch-gpu-env@latest # should match name used in aml_setup/create-env.yaml -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/aml_example/aml_components/train-component.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | type: command 3 | 4 | display_name: pytorch-train-component 5 | name: pytorch_train_component 6 | description: Train a Pytorch model. 7 | version: 1 8 | 9 | code: /workspace/src 10 | command: >- 11 | python sample_pytorch_gpu_project/train.py --train_artifacts_dir ${{outputs.train_artifacts_dir}} --batch_size ${{inputs.batch_size}} 12 | inputs: 13 | batch_size: 14 | type: integer 15 | default: 4 16 | outputs: 17 | train_artifacts_dir: 18 | type: uri_folder 19 | environment: azureml:pytorch-gpu-env@latest # should match name used in aml_setup/create-env.yaml -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/aml_example/aml_setup/create-cpu-compute.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/amlCompute.schema.json 2 | name: pytorch-cpu-cluster 3 | type: amlcompute 4 | size: Standard_DS3_v2 5 | min_instances: 0 6 | max_instances: 1 7 | idle_time_before_scale_down: 120 8 | location: centralus -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/aml_example/aml_setup/create-env.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json 2 | # https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-environments-v2?tabs=cli 3 | name: pytorch-gpu-env 4 | build: 5 | path: /workspace/src # context at this level to include src/common requirements 6 | dockerfile_path: sample_pytorch_gpu_project/.devcontainer/Dockerfile -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/aml_example/aml_setup/create-gpu-compute.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/amlCompute.schema.json 2 | name: pytorch-gpu-cluster 3 | type: amlcompute 4 | size: Standard_NC6 5 | min_instances: 0 6 | max_instances: 1 7 | idle_time_before_scale_down: 120 8 | location: centralus -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/aml_example/sample-aml-components-pipeline.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json 2 | type: pipeline 3 | 4 | display_name: sample_pytorch_gpu_pipeline_run # change this name each run to be descriptive 5 | experiment_name: sample_pytorch_gpu_pipeline 6 | description: Pytorch model training, inference, evaluation. 7 | 8 | jobs: 9 | train_component: 10 | type: command 11 | component: /workspace/src/sample_pytorch_gpu_project/aml_example/aml_components/train-component.yaml 12 | compute: azureml:pytorch-gpu-cluster 13 | inputs: 14 | batch_size: 4 15 | outputs: 16 | train_artifacts_dir: 17 | type: uri_folder 18 | mode: rw_mount 19 | inference_component: 20 | type: command 21 | component: /workspace/src/sample_pytorch_gpu_project/aml_example/aml_components/inference-component.yaml 22 | compute: azureml:pytorch-gpu-cluster 23 | inputs: 24 | train_artifacts_dir: ${{parent.jobs.train_component.outputs.train_artifacts_dir}} 25 | outputs: 26 | test_set_preds_dir: 27 | type: uri_folder 28 | mode: rw_mount 29 | identity: 30 | # use with a managed identity assigned to compute cluster 31 | # type: managed_identity 32 | # use with user identity that is logged in to AML CLI 33 | type: user_identity -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/inference.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | 4 | import mlflow 5 | import pandas as pd 6 | import torch 7 | import torchvision 8 | import torchvision.transforms as transforms 9 | from train import Net 10 | 11 | 12 | def main(args): 13 | # keep this setup code 14 | print("\n".join(f"{k}: {v}" for k, v in sorted(dict(vars(args)).items()))) 15 | dict_args = vars(args) 16 | mlflow.autolog() 17 | mlflow.log_params(dict_args) 18 | 19 | # code below this comment is a sample only, replace with your own training code 20 | net = Net() 21 | net.load_state_dict(torch.load(args.train_artifacts_dir / "cifar_net.pth")) 22 | 23 | # transforms.Normalize() uses Imagenet means and stds 24 | transform = transforms.Compose( 25 | [ 26 | transforms.ToTensor(), 27 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 28 | ] 29 | ) 30 | testset = torchvision.datasets.CIFAR10( 31 | root="./data", train=False, download=True, transform=transform 32 | ) 33 | testloader = torch.utils.data.DataLoader( 34 | testset, batch_size=2, shuffle=False, num_workers=2 35 | ) 36 | 37 | correct = 0 38 | total = 0 39 | combined_predictions = [] 40 | combined_labels = [] 41 | # since we're not training, we don't need to calculate the gradients for our outputs 42 | with torch.no_grad(): 43 | for data in testloader: 44 | images, labels = data 45 | # calculate outputs by running images through the network 46 | outputs = net(images) 47 | # the class with the highest energy is what we choose as prediction 48 | predicted = torch.argmax(outputs.detach(), 1) 49 | combined_predictions.extend(predicted.tolist()) 50 | combined_labels.extend(labels.tolist()) 51 | total += labels.size(0) 52 | correct += (predicted == labels).sum().item() 53 | 54 | accuracy = correct / total 55 | print( 56 | f"Accuracy of the network on the 10000 test images: {100 * accuracy // 1.0} %" 57 | ) 58 | mlflow.log_metric("test_accuracy", accuracy) 59 | 60 | # save predictions CSV to output directory 61 | df_preds = pd.DataFrame( 62 | {"label": combined_labels, "prediction": combined_predictions} 63 | ) 64 | df_preds.to_csv(args.preds_dir / "preds.csv", index=False) 65 | 66 | 67 | if __name__ == "__main__": 68 | parser = argparse.ArgumentParser() 69 | parser.add_argument( 70 | "--train_artifacts_dir", 71 | type=Path, 72 | help="Directory where trained model is saved", 73 | default=Path("outputs"), 74 | ) 75 | parser.add_argument( 76 | "--preds_dir", 77 | type=Path, 78 | help="Output folder containing test set predictions CSV file (preds.csv)", 79 | default=Path("outputs"), 80 | ) 81 | args = parser.parse_args() 82 | main(args) 83 | -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/sample_main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def add(x: int, y: int): 5 | return x + y 6 | 7 | 8 | def main(): 9 | print("torch.cuda.is_available():", torch.cuda.is_available()) 10 | print("torch.cuda.device_count():", torch.cuda.device_count()) 11 | print("torch.backends.mkl.is_available():", torch.backends.mkl.is_available()) 12 | print("torch.backends.cudnn.is_available():", torch.backends.cudnn.is_available()) 13 | print("torch.backends.cuda.is_built():", torch.backends.cuda.is_built()) 14 | print("torch.backends.mkldnn.is_available():", torch.backends.mkldnn.is_available()) 15 | print("torch.version.cuda:", torch.version.cuda) 16 | print("torch.backends.cudnn.version():", torch.backends.cudnn.version()) 17 | 18 | 19 | if __name__ == "__main__": 20 | main() 21 | -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/dstoolkit-devcontainers/1561dffab6598420f486138ad9f3ae69a100faf6/src/sample_pytorch_gpu_project/tests/.gitkeep -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/tests/test_dummy.py: -------------------------------------------------------------------------------- 1 | from sample_pytorch_gpu_project import sample_main 2 | 3 | 4 | def test_main(): 5 | sample_main.main() 6 | 7 | 8 | def test_add(): 9 | assert sample_main.add(1, 2) == 3 10 | -------------------------------------------------------------------------------- /src/sample_pytorch_gpu_project/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | 4 | import mlflow 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F # noqa: N812 8 | import torch.optim as optim 9 | import torchvision 10 | import torchvision.transforms as transforms 11 | 12 | 13 | # Example model, delete or replace with your own 14 | class Net(nn.Module): 15 | def __init__(self): 16 | super().__init__() 17 | self.conv1 = nn.Conv2d(3, 6, 5) 18 | self.pool = nn.MaxPool2d(2, 2) 19 | self.conv2 = nn.Conv2d(6, 16, 5) 20 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 21 | self.fc2 = nn.Linear(120, 84) 22 | self.fc3 = nn.Linear(84, 10) 23 | 24 | def forward(self, x): 25 | x = self.pool(F.relu(self.conv1(x))) 26 | x = self.pool(F.relu(self.conv2(x))) 27 | x = torch.flatten(x, 1) # flatten all dimensions except batch 28 | x = F.relu(self.fc1(x)) 29 | x = F.relu(self.fc2(x)) 30 | x = self.fc3(x) 31 | return x 32 | 33 | 34 | def main(args): 35 | # keep this setup code 36 | print("\n".join(f"{k}: {v}" for k, v in sorted(dict(vars(args)).items()))) 37 | dict_args = vars(args) 38 | args.train_artifacts_dir.mkdir(parents=True, exist_ok=True) 39 | mlflow.autolog() 40 | mlflow.log_params(dict_args) 41 | 42 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 43 | print(device) 44 | 45 | # code below this comment is a sample only, replace with your own training code 46 | 47 | # transforms.Normalize() uses Imagenet means and stds 48 | transform = transforms.Compose( 49 | [ 50 | transforms.ToTensor(), 51 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 52 | ] 53 | ) 54 | 55 | trainset = torchvision.datasets.CIFAR10( 56 | root="./data", train=True, download=True, transform=transform 57 | ) 58 | trainloader = torch.utils.data.DataLoader( 59 | trainset, batch_size=args.batch_size, shuffle=True, num_workers=2 60 | ) 61 | 62 | net = Net() 63 | net.to(device) 64 | criterion = nn.CrossEntropyLoss() 65 | optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) 66 | 67 | for epoch in range(2): # loop over the dataset multiple times 68 | mini_batch_loss = 0.0 69 | for i, data in enumerate(trainloader): 70 | # get the inputs; data is a list of [inputs, labels] 71 | inputs, labels = data[0].to(device), data[1].to(device) 72 | 73 | # zero the parameter gradients 74 | optimizer.zero_grad() 75 | 76 | # forward + backward + optimize 77 | outputs = net(inputs) 78 | loss = criterion(outputs, labels) 79 | loss.backward() 80 | optimizer.step() 81 | 82 | # print statistics 83 | mini_batch_loss += loss.item() 84 | if (i + 1) % 2000 == 0: # print every 2000 mini-batches 85 | print(f"[{epoch + 1}, {i + 1:5d}] loss: {mini_batch_loss / 2000:.3f}") 86 | mlflow.log_metric( 87 | "Training Loss", 88 | mini_batch_loss / 2000, 89 | step=i + (epoch * len(trainloader)), 90 | ) 91 | mini_batch_loss = 0.0 92 | 93 | print("Finished Training") 94 | 95 | # save model 96 | torch.save(net.state_dict(), args.train_artifacts_dir / "cifar_net.pth") 97 | print(f"Model saved to {args.train_artifacts_dir / 'cifar_net.pth'}") 98 | 99 | 100 | if __name__ == "__main__": 101 | parser = argparse.ArgumentParser() 102 | parser.add_argument( 103 | "--train_artifacts_dir", 104 | type=Path, 105 | help="output directory where trained model, checkpoints etc are saved", 106 | default=Path("outputs"), 107 | ) 108 | parser.add_argument( 109 | "--batch_size", type=int, help="the training batch size", default=4 110 | ) 111 | args = parser.parse_args() 112 | main(args) 113 | --------------------------------------------------------------------------------