├── .gitignore ├── CODE_OF_CONDUCT.md ├── CaseStudy.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── config └── model_config.json ├── devops └── pipeline │ ├── build_validation_pipeline.yml │ ├── london_taxi_ci_dev_pipeline.yml │ ├── london_taxi_pr_dev_pipeline.yml │ ├── nyc_taxi_ci_dev_pipeline.yml │ ├── nyc_taxi_pr_dev_pipeline.yml │ ├── platform_ci_dev_pipeline.yml │ ├── platform_pr_dev_pipeline.yml │ ├── requirements │ ├── build_validation_requirements.txt │ └── execute_job_requirements.txt │ └── templates │ ├── configure_azureml_agent.yml │ ├── execute_mlops_pipeline.yml │ ├── experiment_variables.yml │ ├── get_connection_details.yml │ ├── variables_template.yml │ └── wait_with_extension_job.yml ├── docs ├── how_to_setup.md └── images │ ├── ConceptualDesign.png │ ├── DataCollection_Design.png │ ├── MLModelFactory_Design.png │ ├── Orchestrated_Inferencing.png │ ├── ProblemStatement_FishBone.png │ └── UseCaseBuilder_Design.png ├── image.png ├── mlops ├── __init__.py ├── common │ ├── __init__.py │ ├── get_compute.py │ ├── get_environment.py │ ├── get_workspace.py │ └── logger.py ├── london_taxi │ ├── components │ │ ├── predict.yml │ │ ├── prep.yml │ │ ├── register.yml │ │ ├── score.yml │ │ ├── train.yml │ │ └── transform.yml │ ├── data │ │ ├── greenTaxiData.csv │ │ └── yellowTaxiData.csv │ ├── environment │ │ └── conda.yml │ └── src │ │ ├── __init__.py │ │ └── mlops_pipeline.py └── nyc_taxi │ ├── components │ ├── predict.yml │ ├── prep.yml │ ├── register.yml │ ├── score.yml │ ├── train.yml │ └── transform.yml │ ├── data │ ├── greenTaxiData.csv │ └── yellowTaxiData.csv │ ├── environment │ └── conda.yml │ └── src │ ├── __init__.py │ └── mlops_pipeline.py ├── model ├── london_taxi │ ├── dockerfile │ ├── environment │ │ └── requirements.txt │ ├── pipeline-requirements.txt │ ├── sample-request.json │ └── scoring │ │ └── score.py └── nyc_taxi │ ├── dockerfile │ ├── environment │ └── requirements.txt │ ├── pipeline-requirements.txt │ ├── sample-request.json │ └── scoring │ └── score.py ├── notebooks └── execute_commands.ipynb ├── src ├── __init__.py ├── london_src │ ├── __init__.py │ ├── predict │ │ ├── __init__.py │ │ └── predict.py │ ├── prep │ │ ├── __init__.py │ │ └── prep.py │ ├── register │ │ ├── __init__.py │ │ └── register.py │ ├── score │ │ ├── __init__.py │ │ └── score.py │ ├── train │ │ ├── __init__.py │ │ └── train.py │ └── transform │ │ ├── __init__.py │ │ └── transform.py └── nyc_src │ ├── __init__.py │ ├── predict │ ├── __init__.py │ └── predict.py │ ├── prep │ ├── __init__.py │ └── prep.py │ ├── register │ ├── __init__.py │ └── register.py │ ├── score │ ├── __init__.py │ └── score.py │ ├── train │ ├── __init__.py │ └── train.py │ └── transform │ ├── __init__.py │ └── transform.py ├── telco_case_study_implementation └── fridge_object_detection │ ├── .gitignore │ ├── README.md │ ├── docs │ ├── 01-model-factory-design.md │ ├── 02-instructions.md │ └── assets │ │ └── images │ │ └── model_factory_design.jpg │ └── model_factory │ ├── __init__.py │ ├── common │ ├── __init__.py │ ├── devops │ │ └── templates │ │ │ ├── configure_azureml_agent.yml │ │ │ ├── execute_mlops_pipeline.yml │ │ │ ├── experiment_variables.yml │ │ │ ├── get_connection_details.yml │ │ │ ├── image_generation_template.yml │ │ │ ├── platform_dev_pipeline.yml │ │ │ ├── platform_main_pipeline.yml │ │ │ └── variables_template.yml │ ├── logging │ │ ├── __init__.py │ │ └── logger.py │ └── mlops │ │ ├── __init__.py │ │ ├── get_aml_client.py │ │ ├── get_compute.py │ │ ├── get_environment.py │ │ └── get_workspace.py │ └── fridge_obj_det │ ├── Makefile │ ├── __init__.py │ ├── config │ └── model_config.json │ ├── devops │ └── pipelines │ │ ├── build_validation_pipeline.yml │ │ ├── fridge_obj_det_dev_pipeline.yml │ │ ├── fridge_obj_det_main_pipeline.yml │ │ ├── fridge_obj_det_mlops_pipeline.yml │ │ └── requirements │ │ ├── build_validation_requirements.txt │ │ └── execute_job_requirements.txt │ ├── environment │ └── requirements.txt │ ├── mlops │ ├── .gitkeep │ ├── __init__.py │ ├── components │ │ ├── compare_map.yml │ │ ├── convert.yml │ │ ├── prep.yml │ │ ├── register.yml │ │ ├── score.yml │ │ └── train.yml │ ├── environment │ │ ├── conda.yml │ │ └── create_devenv.py │ └── src │ │ ├── __init__.py │ │ └── mlops_pipeline.py │ ├── model │ ├── .python-version │ ├── Dockerfile │ ├── __init__.py │ ├── model_artifacts │ │ ├── labels.json │ │ └── test_sample.jpg │ ├── package.json │ ├── poetry.lock │ ├── pyproject.toml │ ├── scoring │ │ ├── __init__.py │ │ ├── predict.py │ │ ├── prepare.py │ │ └── score.py │ └── server │ │ └── __main__.py │ ├── src │ ├── __init__.py │ ├── compare_map │ │ ├── __init__.py │ │ ├── compare_map.py │ │ └── compare_pipeline.py │ ├── convert │ │ ├── convert_fp32_to_fp16.py │ │ └── convert_pipeline.py │ ├── prep │ │ ├── __init__.py │ │ ├── prep.py │ │ └── voc_jsonl_converter.py │ ├── register │ │ ├── __init__.py │ │ └── register.py │ ├── score │ │ ├── __init__.py │ │ └── score.py │ └── train │ │ ├── __init__.py │ │ └── train.py │ └── test │ ├── __init__.py │ └── test_compare_map.py └── test ├── __init__.py ├── london_taxi ├── __init__.py └── test_to_delete.py ├── nyc_taxi ├── __init__.py └── test_to_delete.py └── test_to_delete.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MLOps Model Factory Accelerator 2 | 3 | > **Note:** 4 | > This is a repo that can be shared to our customers. This means it's NOT OK to include Microsoft confidential 5 | > content. All discussions should be appropriate for a public audience. 6 | 7 | MLOps Model Factory is a platform and an end to end workflow that supports generating multiple models and used for deployment to any target. 8 | 9 | ## Features 10 | 11 | - Supports generation of multiple ML Models through a single platform and repo 12 | - MLOps pipeline for Data preparation, transformation, Model Training, evaluation, scoring and registration 13 | - Based on Azure ML SDK v2 1.4 14 | - Option to package ML Models in Docker Images 15 | 16 | 17 | 18 | ## About this repo 19 | 20 | The idea of this platform and end to end workflow is to provide a minimum number of scripts to implement an environment to train and test multiple ML Models using Azure ML SDK v2 and Azure DevOps. 21 | 22 | The workflow contains the following folders/files: 23 | 24 | - devops: the folder contains Azure DevOps related files (yaml files to define Builds). 25 | - docs: documentation. 26 | - src: source code that is not related to Azure ML directly. This is typically data science related code. 27 | - mlops: scripts that are related to Azure ML. 28 | - mlops/nyc-taxi: a fake pipeline with some basic code to build a model 29 | - mlops/london-taxi: a fake pipeline with some basic code to build another model 30 | - test: a folder with dummy test to write unit tests for the build 31 | - model: Model related files and dependencies 32 | 33 | - .amlignore: using this file we are removing all the folders and files that are not supposed to be in Azure ML compute. 34 | 35 | The workflow contains the following documents: 36 | 37 | - docs/how_to_setup.md: explain how to configure the workflow. 38 | 39 | ## How to use the repo 40 | 41 | Information about how to setup the repo is in [the following document](./docs/how_to_setup.md). 42 | 43 | ## Local experimentation 44 | 45 | Developers and Data scientists can use the [execute-command](./notebooks/execute_commands.ipynb) in the `notebooks` to try out the commands in the AML compute from their local machine. 46 | 47 | ## Reference 48 | 49 | * [Azure Machine learning](https://docs.microsoft.com/azure/machine-learning) 50 | * [Azure DevOps pipelines](https://learn.microsoft.com/en-gb/azure/devops/pipelines/) 51 | * [Azure Machine learning SDK V2](https://learn.microsoft.com/en-gb/python/api/overview/azure/ai-ml-readme?view=azure-python) 52 | * [Azure AD Service Principal](https://learn.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal) 53 | * [Azure Key Vault](https://learn.microsoft.com/en-gb/azure/key-vault/general/) -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /config/model_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "models":[ 3 | { 4 | "ML_MODEL_CONFIG_NAME": "london_taxi", 5 | "ENV_NAME": "dev", 6 | "CLUSTER_NAME": "new-cluster", 7 | "CLUSTER_REGION": "eastus", 8 | "CLUSTER_SIZE": "STANDARD_DS3_v2", 9 | "CONDA_PATH": "mlops/london_taxi/environment/conda.yml", 10 | "DISPLAY_BASE_NAME": "mlops", 11 | "ENV_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04", 12 | "ENVIRONMENT_NAME": "sklearn-python3", 13 | "EXPERIMENT_BASE_NAME": "londontaxi", 14 | "KEYVAULT_NAME": "researchmlops5963078644", 15 | "MODEL_BASE_NAME": "regr", 16 | "RESOURCE_GROUP_NAME": "mlops", 17 | "WORKSPACE_NAME": "researchmlops" 18 | }, 19 | { 20 | "ML_MODEL_CONFIG_NAME": "nyc_taxi", 21 | "ENV_NAME": "dev", 22 | "CLUSTER_NAME": "new-cluster", 23 | "CLUSTER_REGION": "eastus", 24 | "CLUSTER_SIZE": "STANDARD_DS3_v2", 25 | "CONDA_PATH": "mlops/nyc_taxi/environment/conda.yml", 26 | "DISPLAY_BASE_NAME": "mlops", 27 | "ENV_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04", 28 | "ENVIRONMENT_NAME": "sklearn-python3", 29 | "EXPERIMENT_BASE_NAME": "nyctaxi", 30 | "KEYVAULT_NAME": "researchmlops5963078644", 31 | "MODEL_BASE_NAME": "cls", 32 | "RESOURCE_GROUP_NAME": "mlops", 33 | "WORKSPACE_NAME": "researchmlops" 34 | } 35 | ] 36 | } -------------------------------------------------------------------------------- /devops/pipeline/build_validation_pipeline.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: model_type 3 | displayName: "type of model to execute" 4 | 5 | jobs: 6 | - job: Build_Validation_Pipeline 7 | steps: 8 | - task: UsePythonVersion@0 9 | displayName: 'Use Python 3.8' 10 | inputs: 11 | versionSpec: '3.8' 12 | 13 | - script: | 14 | python -m pip install --upgrade pip 15 | pip install -r devops/pipeline/requirements/build_validation_requirements.txt 16 | displayName: "Load Python Dependencies" 17 | 18 | 19 | - script: | 20 | pytest test/${{ parameters.model_type }} --ignore=sandbox/ --junitxml=junit/test-results.xml --cov=. --cov-report=xml 21 | displayName: 'Run Unit Tests' 22 | condition: succeededOrFailed() 23 | 24 | - task: PublishTestResults@2 25 | condition: succeededOrFailed() 26 | inputs: 27 | testResultsFiles: '**/test-*.xml' 28 | testRunTitle: 'Publish Test Results for Python $(python.version)' 29 | 30 | - task: PublishCodeCoverageResults@1 31 | inputs: 32 | codeCoverageTool: Cobertura 33 | summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' -------------------------------------------------------------------------------- /devops/pipeline/london_taxi_ci_dev_pipeline.yml: -------------------------------------------------------------------------------- 1 | pr: none 2 | trigger: 3 | branches: 4 | include: 5 | - master 6 | paths: 7 | include: 8 | - devops/* 9 | - mlops/common/* 10 | - mlops/london_taxi/* 11 | - src/london_src/* 12 | - model/london_taxi/* 13 | - src/shared/* 14 | 15 | pool: 16 | vmImage: ubuntu-latest 17 | 18 | 19 | variables: 20 | - group: mlops_platform_dev_vg 21 | - name: PIPELINE_TYPE 22 | value: london_taxi 23 | 24 | parameters: 25 | - name: env_name 26 | displayName: "Execution Environment" 27 | default: "dev" 28 | - name: model_type 29 | displayName: "type of model to execute" 30 | default: "london_taxi" 31 | 32 | stages: 33 | - template: templates/variables_template.yml 34 | parameters: 35 | env_name: ${{parameters.env_name}} 36 | model_type: ${{parameters.model_type}} 37 | - template: platform_ci_dev_pipeline.yml 38 | parameters: 39 | exec_environment: ${{ parameters.env_name }} 40 | model_type: ${{ parameters.model_type }} -------------------------------------------------------------------------------- /devops/pipeline/london_taxi_pr_dev_pipeline.yml: -------------------------------------------------------------------------------- 1 | trigger: none 2 | pr: 3 | branches: 4 | include: 5 | - development 6 | paths: 7 | include: 8 | - devops/* 9 | - mlops/common/* 10 | - mlops/london_taxi/* 11 | - src/london_src/* 12 | - model/london_taxi/* 13 | - src/shared/* 14 | 15 | pool: 16 | vmImage: ubuntu-latest 17 | 18 | 19 | variables: 20 | - group: mlops_platform_dev_vg 21 | - name: PIPELINE_TYPE 22 | value: london_taxi 23 | 24 | parameters: 25 | - name: env_name 26 | displayName: "Execution Environment" 27 | default: "dev" 28 | - name: model_type 29 | displayName: "type of model to execute" 30 | default: "london_taxi" 31 | 32 | stages: 33 | - template: templates/variables_template.yml 34 | parameters: 35 | env_name: ${{parameters.env_name}} 36 | model_type: ${{parameters.model_type}} 37 | - template: platform_pr_dev_pipeline.yml 38 | parameters: 39 | exec_environment: ${{ parameters.env_name }} 40 | model_type: ${{ parameters.model_type }} -------------------------------------------------------------------------------- /devops/pipeline/nyc_taxi_ci_dev_pipeline.yml: -------------------------------------------------------------------------------- 1 | pr: none 2 | trigger: 3 | branches: 4 | include: 5 | - master 6 | paths: 7 | include: 8 | - devops/* 9 | - mlops/common/* 10 | - mlops/nyc_taxi/* 11 | - src/nyc_src/* 12 | - model/nyc_taxi/* 13 | - src/shared/* 14 | 15 | pool: 16 | vmImage: ubuntu-latest 17 | 18 | 19 | variables: 20 | - group: mlops_platform_dev_vg 21 | - name: PIPELINE_TYPE 22 | value: nyc_taxi 23 | 24 | parameters: 25 | - name: env_name 26 | displayName: "Execution Environment" 27 | default: "dev" 28 | - name: model_type 29 | displayName: "type of model to execute" 30 | default: "nyc_taxi" 31 | 32 | stages: 33 | - template: templates/variables_template.yml 34 | parameters: 35 | env_name: ${{parameters.env_name}} 36 | model_type: ${{parameters.model_type}} 37 | - template: platform_ci_dev_pipeline.yml 38 | parameters: 39 | exec_environment: ${{ parameters.env_name }} 40 | model_type: ${{ parameters.model_type }} -------------------------------------------------------------------------------- /devops/pipeline/nyc_taxi_pr_dev_pipeline.yml: -------------------------------------------------------------------------------- 1 | trigger: none 2 | pr: 3 | branches: 4 | include: 5 | - development 6 | paths: 7 | include: 8 | - devops/* 9 | - mlops/common/* 10 | - mlops/nyc_taxi/* 11 | - src/nyc_src/* 12 | - model/nyc_taxi/* 13 | - src/shared/* 14 | 15 | pool: 16 | vmImage: ubuntu-latest 17 | 18 | 19 | variables: 20 | - group: mlops_platform_dev_vg 21 | - name: PIPELINE_TYPE 22 | value: nyc_taxi 23 | 24 | parameters: 25 | - name: env_name 26 | displayName: "Execution Environment" 27 | default: "dev" 28 | - name: model_type 29 | displayName: "type of model to execute" 30 | default: "nyc_taxi" 31 | 32 | stages: 33 | - template: templates/variables_template.yml 34 | parameters: 35 | env_name: ${{parameters.env_name}} 36 | model_type: ${{parameters.model_type}} 37 | - template: platform_pr_dev_pipeline.yml 38 | parameters: 39 | exec_environment: ${{ parameters.env_name }} 40 | model_type: ${{ parameters.model_type }} -------------------------------------------------------------------------------- /devops/pipeline/platform_ci_dev_pipeline.yml: -------------------------------------------------------------------------------- 1 | 2 | parameters: 3 | - name: exec_environment 4 | displayName: "Execution Environment" 5 | default: "dev" 6 | - name: model_type 7 | displayName: "type of model to execute" 8 | 9 | 10 | stages: 11 | - stage: execute_training_job 12 | displayName: execute_training_job 13 | dependsOn: 14 | - variable_generation 15 | variables: 16 | - template: templates/experiment_variables.yml 17 | jobs: 18 | - job: Execute_ml_Job_Pipeline 19 | steps: 20 | - template: templates/get_connection_details.yml 21 | - template: templates/configure_azureml_agent.yml 22 | - template: templates/execute_mlops_pipeline.yml 23 | parameters: 24 | script_parameter: | 25 | python -m mlops.${{ parameters.model_type }}.src.mlops_pipeline \ 26 | --subscription_id $(SUBSCRIPTION_ID) \ 27 | --resource_group_name $(RESOURCE_GROUP_NAME) \ 28 | --workspace_name $(WORKSPACE_NAME) \ 29 | --cluster_name $(CLUSTER_NAME) \ 30 | --cluster_size $(CLUSTER_SIZE) \ 31 | --cluster_region $(CLUSTER_REGION) \ 32 | --build_reference $(BUILD.BUILDID) \ 33 | --deploy_environment ${{parameters.exec_environment}} \ 34 | --experiment_name $(EXPERIMENT_NAME) \ 35 | --display_name $(DISPLAY_NAME) \ 36 | --wait_for_completion True \ 37 | --environment_name $(ENVIRONMENT_NAME) \ 38 | --env_base_image_name $(ENV_BASE_IMAGE_NAME) \ 39 | --model_name $(MODEL_NAME) \ 40 | --conda_path $(CONDA_PATH) \ 41 | --output_file run_id.txt 42 | 43 | -------------------------------------------------------------------------------- /devops/pipeline/platform_pr_dev_pipeline.yml: -------------------------------------------------------------------------------- 1 | 2 | parameters: 3 | - name: exec_environment 4 | displayName: "Execution Environment" 5 | default: "dev" 6 | - name: model_type 7 | displayName: "type of model to execute" 8 | 9 | 10 | stages: 11 | - stage: build_validation 12 | displayName: build_validation 13 | dependsOn: 14 | - variable_generation 15 | variables: 16 | - template: templates/experiment_variables.yml 17 | jobs: 18 | - template: build_validation_pipeline.yml 19 | parameters: 20 | model_type: ${{ parameters.model_type }} 21 | - stage: execute_training_job 22 | displayName: execute_training_job 23 | dependsOn: 24 | - variable_generation 25 | - build_validation 26 | variables: 27 | - template: templates/experiment_variables.yml 28 | jobs: 29 | - job: Execute_ml_Job_Pipeline 30 | steps: 31 | - template: templates/get_connection_details.yml 32 | - template: templates/configure_azureml_agent.yml 33 | - template: templates/execute_mlops_pipeline.yml 34 | parameters: 35 | script_parameter: | 36 | python -m mlops.${{ parameters.model_type }}.src.mlops_pipeline \ 37 | --subscription_id $(SUBSCRIPTION_ID) \ 38 | --resource_group_name $(RESOURCE_GROUP_NAME) \ 39 | --workspace_name $(WORKSPACE_NAME) \ 40 | --cluster_name $(CLUSTER_NAME) \ 41 | --cluster_size $(CLUSTER_SIZE) \ 42 | --cluster_region $(CLUSTER_REGION) \ 43 | --build_reference $(BUILD.BUILDID) \ 44 | --deploy_environment ${{parameters.exec_environment}} \ 45 | --experiment_name $(EXPERIMENT_NAME) \ 46 | --display_name $(DISPLAY_NAME) \ 47 | --wait_for_completion True \ 48 | --environment_name $(ENVIRONMENT_NAME) \ 49 | --env_base_image_name $(ENV_BASE_IMAGE_NAME) \ 50 | --model_name $(MODEL_NAME) \ 51 | --conda_path $(CONDA_PATH) 52 | -------------------------------------------------------------------------------- /devops/pipeline/requirements/build_validation_requirements.txt: -------------------------------------------------------------------------------- 1 | flake8-docstrings==1.6.0 2 | flake8==4.0.1 3 | pep8-naming==0.13.0 4 | pytest-cov==3.0.0 5 | pytest-azurepipelines==1.0.3 6 | pytest-mock==3.7.0 7 | pytest==7.1.2 8 | mlflow==1.27.0 9 | azure-ai-ml==1.5.0 10 | azure-identity==1.11.0 11 | mldesigner==0.1.0b4 12 | -------------------------------------------------------------------------------- /devops/pipeline/requirements/execute_job_requirements.txt: -------------------------------------------------------------------------------- 1 | azure-cli==2.53.0 2 | azure-ai-ml==1.5.0 3 | azure-identity==1.11.0 4 | flake8-docstrings==1.6.0 5 | flake8==4.0.1 6 | pep8-naming==0.13.0 7 | pytest-cov==3.0.0 8 | pytest-azurepipelines==1.0.3 9 | pytest-mock==3.7.0 10 | pytest==7.1.2 11 | mlflow==2.7.1 12 | mldesigner==0.1.0b4 -------------------------------------------------------------------------------- /devops/pipeline/templates/configure_azureml_agent.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - task: UsePythonVersion@0 3 | displayName: 'Use Python 3.8' 4 | inputs: 5 | versionSpec: '3.8' 6 | 7 | - task: AzureCLI@2 8 | displayName: Install Job Requirements 9 | inputs: 10 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 11 | scriptType: bash 12 | scriptLocation: inlineScript 13 | workingDirectory: $(System.DefaultWorkingDirectory) 14 | inlineScript: | 15 | set -e # fail on error 16 | python -m pip install --upgrade pip 17 | pip install -r devops/pipeline/requirements/execute_job_requirements.txt 18 | az version 19 | 20 | 21 | -------------------------------------------------------------------------------- /devops/pipeline/templates/execute_mlops_pipeline.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: script_parameter 3 | type: string 4 | 5 | steps: 6 | - task: AzureCLI@2 7 | name: submit_aml_job_task 8 | displayName: Execute Azure ML pipeline job 9 | continueOnError: false 10 | env: {APPLICATIONINSIGHTS_CONNECTION_STRING: "$(APPLICATIONINSIGHTS-CONNECTION-STRING)"} 11 | inputs: 12 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 13 | scriptType: bash 14 | workingDirectory: $(System.DefaultWorkingDirectory) 15 | scriptLocation: inlineScript 16 | inlineScript: | 17 | ${{parameters.script_parameter}} 18 | -------------------------------------------------------------------------------- /devops/pipeline/templates/experiment_variables.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | - name: ML_MODEL_CONFIG_NAME 3 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ML_MODEL_CONFIG_NAME'] ] 4 | - name: KEYVAULT_NAME 5 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.KEYVAULT_NAME'] ] 6 | - name: EXPERIMENT_BASE_NAME 7 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.EXPERIMENT_BASE_NAME'] ] 8 | - name: ENVIRONMENT_NAME 9 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ENVIRONMENT_NAME'] ] 10 | - name: ENV_BASE_IMAGE_NAME 11 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ENV_BASE_IMAGE_NAME'] ] 12 | - name: DISPLAY_BASE_NAME 13 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.DISPLAY_BASE_NAME'] ] 14 | - name: CONDA_PATH 15 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CONDA_PATH'] ] 16 | - name: CLUSTER_SIZE 17 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_SIZE'] ] 18 | - name: CLUSTER_REGION 19 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_REGION'] ] 20 | - name: CLUSTER_NAME 21 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_NAME'] ] 22 | - name: AZURE_RM_SVC_CONNECTION 23 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.AZURE_RM_SVC_CONNECTION'] ] 24 | - name: MODEL_BASE_NAME 25 | value: $[ dependencies.variable_generation.load_config_variables.outputs['loading_model_config.MODEL_BASE_NAME'] ] 26 | - name: RESOURCE_GROUP_NAME 27 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.RESOURCE_GROUP_NAME'] ] 28 | - name: WORKSPACE_NAME 29 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.WORKSPACE_NAME'] ] 30 | - name: EXPERIMENT_NAME 31 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.EXPERIMENT_NAME'] ] 32 | - name: DISPLAY_NAME 33 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.DISPLAY_NAME'] ] 34 | - name: MODEL_NAME 35 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.MODEL_NAME'] ] -------------------------------------------------------------------------------- /devops/pipeline/templates/get_connection_details.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - task: AzureCLI@2 3 | name: retrieveAzureServiceConnection 4 | displayName: Retrieve Azure Service Connection 5 | inputs: 6 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 7 | scriptLocation: inlineScript 8 | scriptType: bash 9 | inlineScript: | 10 | export subscriptionId=$(az account show --query id -o tsv) 11 | echo "##vso[task.setvariable variable=SUBSCRIPTION_ID]$subscriptionId" 12 | echo "##vso[task.setvariable variable=TENANT_ID]$tenantId" 13 | addSpnToEnvironment: true -------------------------------------------------------------------------------- /devops/pipeline/templates/variables_template.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: env_name 3 | displayName: "Execution Environment" 4 | - name: model_type 5 | displayName: "type of model to execute" 6 | 7 | stages: 8 | - stage: variable_generation 9 | jobs: 10 | - job: load_config_variables 11 | steps: 12 | - powershell: | 13 | $json = Get-Content -Raw -Path '$(System.DefaultWorkingDirectory)/config/model_config.json' | ConvertFrom-Json 14 | $firstElement = $json.models | Where-Object {($_.ML_MODEL_CONFIG_NAME -eq "${{ parameters.model_type }}") -and ($_.ENV_NAME -eq "${{ parameters.env_name }}")} | Select-Object -First 1 15 | 16 | Write-Output $firstElement.KEYVAULT_NAME 17 | 18 | foreach ($property in $firstElement.PSObject.Properties) { 19 | $pname = $property.Name 20 | $pvalue = $property.Value 21 | Write-Output "##vso[task.setvariable variable=$pname;isoutput=true]$pvalue" 22 | } 23 | 24 | $EXPERIMENT_NAME = "${{ parameters.model_type }}" + "_" + "$($firstElement.EXPERIMENT_BASE_NAME)" + "_" + "${{parameters.env_name}}" + "_" + "$(Build.SourceBranchName)" 25 | Write-Output "##vso[task.setvariable variable=EXPERIMENT_NAME;isoutput=true]$EXPERIMENT_NAME" 26 | 27 | $DISPLAY_NAME = "${{ parameters.model_type }}" + "_" + "$($firstElement.DISPLAY_BASE_NAME)" + "_" + "${{parameters.env_name}}" + "_" + "$(Build.BuildID)" 28 | Write-Output "##vso[task.setvariable variable=DISPLAY_NAME;isoutput=true]$DISPLAY_NAME" 29 | Write-Output $DISPLAY_NAME 30 | $MODEL_NAME = "${{ parameters.model_type }}" + "_" + "$($firstElement.MODEL_BASE_NAME)" + "_" + "${{parameters.env_name}}" + "_" + "$(Build.SourceBranchName)" 31 | Write-Output "##vso[task.setvariable variable=MODEL_NAME;isoutput=true]$MODEL_NAME" 32 | name: loading_model_config 33 | 34 | - job: validate_assign_variables 35 | dependsOn: load_config_variables 36 | variables: 37 | - name: ML_MODEL_CONFIG_NAME 38 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.ML_MODEL_CONFIG_NAME'] ] 39 | - name: KEYVAULT_NAME 40 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.KEYVAULT_NAME'] ] 41 | - name: EXPERIMENT_BASE_NAME 42 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.EXPERIMENT_BASE_NAME'] ] 43 | - name: ENVIRONMENT_NAME 44 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.ENVIRONMENT_NAME'] ] 45 | - name: ENV_BASE_IMAGE_NAME 46 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.ENV_BASE_IMAGE_NAME'] ] 47 | - name: DISPLAY_BASE_NAME 48 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.DISPLAY_BASE_NAME'] ] 49 | - name: CONDA_PATH 50 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.CONDA_PATH'] ] 51 | - name: CLUSTER_SIZE 52 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_SIZE'] ] 53 | - name: CLUSTER_REGION 54 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_REGION'] ] 55 | - name: CLUSTER_NAME 56 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_NAME'] ] 57 | - name: AZURE_RM_SVC_CONNECTION 58 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.AZURE_RM_SVC_CONNECTION'] ] 59 | - name: MODEL_BASE_NAME 60 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.MODEL_BASE_NAME'] ] 61 | - name: RESOURCE_GROUP_NAME 62 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.RESOURCE_GROUP_NAME'] ] 63 | - name: WORKSPACE_NAME 64 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.WORKSPACE_NAME'] ] 65 | - name: EXPERIMENT_NAME 66 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.EXPERIMENT_NAME'] ] 67 | - name: DISPLAY_NAME 68 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.DISPLAY_NAME'] ] 69 | - name: MODEL_NAME 70 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.MODEL_NAME'] ] 71 | steps: 72 | - script: | 73 | if [ -z "$(ML_MODEL_CONFIG_NAME)" ] 74 | then 75 | echo "variables are not available. Check parameter values or config json file for valid values.." 76 | exit 1 77 | else 78 | echo "variables were loaded from config file.." 79 | printenv 80 | fi 81 | name: validate_variable_load 82 | - task: AzureKeyVault@2 83 | continueOnError: false 84 | inputs: 85 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 86 | KeyVaultName: $(KEYVAULT_NAME) 87 | SecretsFilter: '*' 88 | RunAsPreJob: false 89 | name: load_keyvault_secrets 90 | 91 | 92 | -------------------------------------------------------------------------------- /devops/pipeline/templates/wait_with_extension_job.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | - job: WaitForCallback 3 | pool: server 4 | timeoutInMinutes: 0 5 | dependsOn: Execute_Job_Pipeline 6 | variables: 7 | - name: run_name_from_submit_job 8 | value: $[ dependencies.Execute_Job_Pipeline.outputs['read_run_id.RUN_NAME'] ] 9 | 10 | steps: 11 | - task: AzureMLJobWaitTask@0 12 | inputs: 13 | serviceConnection: $(AZURE_RM_SVC_CONNECTION) 14 | resourceGroupName: $(RESOURCE_GROUP_NAME) 15 | azureMLWorkspaceName: $(WORKSPACE_NAME) 16 | azureMLWorkspaceLocation: $(CLUSTER_REGION) 17 | azureMLJobName: $(run_name_from_submit_job) 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/images/ConceptualDesign.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/ConceptualDesign.png -------------------------------------------------------------------------------- /docs/images/DataCollection_Design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/DataCollection_Design.png -------------------------------------------------------------------------------- /docs/images/MLModelFactory_Design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/MLModelFactory_Design.png -------------------------------------------------------------------------------- /docs/images/Orchestrated_Inferencing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/Orchestrated_Inferencing.png -------------------------------------------------------------------------------- /docs/images/ProblemStatement_FishBone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/ProblemStatement_FishBone.png -------------------------------------------------------------------------------- /docs/images/UseCaseBuilder_Design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/UseCaseBuilder_Design.png -------------------------------------------------------------------------------- /image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/image.png -------------------------------------------------------------------------------- /mlops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/mlops/__init__.py -------------------------------------------------------------------------------- /mlops/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/mlops/common/__init__.py -------------------------------------------------------------------------------- /mlops/common/get_compute.py: -------------------------------------------------------------------------------- 1 | from azure.ai.ml import MLClient 2 | from azure.identity import DefaultAzureCredential 3 | import argparse 4 | from azure.ai.ml.entities import AmlCompute 5 | from mlops.common.logger import get_logger 6 | 7 | logger = get_logger() 8 | 9 | def get_compute( 10 | subscription_id: str, 11 | resource_group_name: str, 12 | workspace_name: str, 13 | cluster_name: str, 14 | cluster_size: str, 15 | cluster_region: str, 16 | min_instances: int, 17 | max_instances: int, 18 | idle_time_before_scale_down: int, 19 | ): 20 | compute_object = None 21 | try: 22 | client = MLClient( 23 | DefaultAzureCredential(), 24 | subscription_id=subscription_id, 25 | resource_group_name=resource_group_name, 26 | workspace_name=workspace_name, 27 | ) 28 | try: 29 | compute_object = client.compute.get(cluster_name) 30 | logger.info(f"Found existing compute target {cluster_name}, so using it.") 31 | except: 32 | logger.info(f"{cluster_name} is not found! Trying to create a new one.") 33 | compute_object = AmlCompute( 34 | name=cluster_name, 35 | type="amlcompute", 36 | size=cluster_size, 37 | location=cluster_region, 38 | min_instances=min_instances, 39 | max_instances=max_instances, 40 | idle_time_before_scale_down=idle_time_before_scale_down, 41 | ) 42 | compute_object = client.compute.begin_create_or_update( 43 | compute_object 44 | ).result() 45 | logger.info(f"A new cluster {cluster_name} has been created.") 46 | except Exception as ex: 47 | logger.exception("Oops! invalid credentials.. Try again...") 48 | raise 49 | return compute_object 50 | 51 | 52 | def main(): 53 | parser = argparse.ArgumentParser("get_compute") 54 | parser.add_argument("--subscription_id", type=str, help="Azure subscription id") 55 | parser.add_argument( 56 | "--resource_group_name", type=str, help="Azure Machine learning resource group" 57 | ) 58 | parser.add_argument( 59 | "--workspace_name", type=str, help="Azure Machine learning Workspace name" 60 | ) 61 | parser.add_argument( 62 | "--cluster_name", type=str, help="Azure Machine learning cluster name" 63 | ) 64 | parser.add_argument( 65 | "--cluster_size", type=str, help="Azure Machine learning cluster size" 66 | ) 67 | parser.add_argument( 68 | "--cluster_region", type=str, help="Azure Machine learning cluster region" 69 | ) 70 | parser.add_argument("--min_instances", type=int, default=0) 71 | parser.add_argument("--max_instances", type=int, default=4) 72 | parser.add_argument("--idle_time_before_scale_down", type=int, default=120) 73 | 74 | args = parser.parse_args() 75 | get_compute( 76 | args.subscription_id, 77 | args.resource_group_name, 78 | args.workspace_name, 79 | args.cluster_name, 80 | args.cluster_size, 81 | args.cluster_region, 82 | args.min_instances, 83 | args.max_instances, 84 | args.idle_time_before_scale_down, 85 | ) 86 | 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /mlops/common/get_environment.py: -------------------------------------------------------------------------------- 1 | from azure.ai.ml import MLClient 2 | from azure.identity import DefaultAzureCredential 3 | import argparse 4 | from azure.ai.ml.entities import Environment 5 | from mlops.common.logger import get_logger 6 | 7 | logger = get_logger() 8 | 9 | 10 | def get_environment( 11 | subscription_id: str, 12 | resource_group_name: str, 13 | workspace_name: str, 14 | env_base_image_name: str, 15 | conda_path: str, 16 | environment_name: str, 17 | description: str, 18 | ): 19 | try: 20 | logger.info(f"Checking {environment_name} environment.") 21 | client = MLClient( 22 | DefaultAzureCredential(), 23 | subscription_id=subscription_id, 24 | resource_group_name=resource_group_name, 25 | workspace_name=workspace_name, 26 | ) 27 | env_docker_conda = Environment( 28 | image=env_base_image_name, 29 | conda_file=conda_path, 30 | name=environment_name, 31 | description=description, 32 | ) 33 | environment = client.environments.create_or_update(env_docker_conda) 34 | logger.info(f"Environment {environment_name} has been created or updated.") 35 | return environment 36 | 37 | except Exception as ex: 38 | logger.exception( 39 | "Oops! invalid credentials or error while creating ML environment.. Try again..." 40 | ) 41 | raise 42 | 43 | 44 | def main(): 45 | parser = argparse.ArgumentParser("prepare_environment") 46 | parser.add_argument("--subscription_id", type=str, help="Azure subscription id") 47 | parser.add_argument( 48 | "--resource_group_name", type=str, help="Azure Machine learning resource group" 49 | ) 50 | parser.add_argument( 51 | "--workspace_name", type=str, help="Azure Machine learning Workspace name" 52 | ) 53 | parser.add_argument( 54 | "--env_base_image_name", type=str, help="Environment custom base image name" 55 | ) 56 | parser.add_argument( 57 | "--conda_path", type=str, help="path to conda requirements file" 58 | ) 59 | parser.add_argument( 60 | "--environment_name", type=str, help="Azure Machine learning environment name" 61 | ) 62 | parser.add_argument( 63 | "--description", type=str, default="Environment created using Conda." 64 | ) 65 | args = parser.parse_args() 66 | 67 | get_environment( 68 | args.subscription_id, 69 | args.resource_group_name, 70 | args.workspace_name, 71 | args.env_base_image_name, 72 | args.conda_path, 73 | args.environment_name, 74 | args.description, 75 | ) 76 | 77 | 78 | if __name__ == "__main__": 79 | main() 80 | -------------------------------------------------------------------------------- /mlops/common/get_workspace.py: -------------------------------------------------------------------------------- 1 | from azure.ai.ml import MLClient 2 | from azure.identity import DefaultAzureCredential 3 | import argparse 4 | from mlops.common.logger import get_logger 5 | 6 | logger = get_logger() 7 | 8 | def get_workspace(subscription_id: str, resource_group_name: str, workspace_name: str): 9 | try: 10 | logger.info(f"Getting access to {workspace_name} workspace.") 11 | client = MLClient( 12 | DefaultAzureCredential(), 13 | subscription_id=subscription_id, 14 | resource_group_name=resource_group_name, 15 | workspace_name=workspace_name, 16 | ) 17 | 18 | workspace = client.workspaces.get(workspace_name) 19 | logger.info(f"Reference to {workspace_name} has been obtained.") 20 | return workspace 21 | except Exception as ex: 22 | logger.exception("Oops! invalid credentials.. Try again...") 23 | raise 24 | 25 | 26 | def main(): 27 | parser = argparse.ArgumentParser("get_workspace") 28 | parser.add_argument("--subscription_id", type=str, help="Azure subscription id") 29 | parser.add_argument( 30 | "--resource_group_name", type=str, help="Azure Machine learning resource group" 31 | ) 32 | parser.add_argument( 33 | "--workspace_name", type=str, help="Azure Machine learning Workspace name" 34 | ) 35 | 36 | args = parser.parse_args() 37 | get_workspace(args.subscription_id, args.resource_group_name, args.workspace_name) 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /mlops/common/logger.py: -------------------------------------------------------------------------------- 1 | """Reusable logger for model_factory.""" 2 | import logging 3 | import sys 4 | 5 | 6 | def get_logger(name: str = "mlops", level: int = logging.INFO) -> logging.Logger: 7 | """Get logger. 8 | 9 | Args: 10 | name (str, optional): Logger name. Defaults to "mlops". 11 | level (int, optional): Log level. Defaults to logging.INFO. 12 | 13 | Returns: 14 | logging.Logger: named logger. 15 | """ 16 | logger = logging.getLogger(name) 17 | if logger.hasHandlers(): 18 | return logger 19 | 20 | handler = logging.StreamHandler(sys.stdout) 21 | formatter = logging.Formatter( 22 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 23 | ) 24 | handler.setFormatter(formatter) 25 | 26 | logger.setLevel(level) 27 | logger.addHandler(handler) 28 | 29 | return logger 30 | -------------------------------------------------------------------------------- /mlops/london_taxi/components/predict.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: predict_taxi_fares 3 | version: 1 4 | display_name: PredictTaxiFares 5 | type: command 6 | inputs: 7 | model_input: 8 | type: mlflow_model 9 | test_data: 10 | type: uri_folder 11 | outputs: 12 | predictions: 13 | type: uri_folder 14 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 15 | code: ./../../../ 16 | command: >- 17 | python -m src.london_src.predict.predict 18 | --model_input ${{inputs.model_input}} 19 | --test_data ${{inputs.test_data}} 20 | --predictions ${{outputs.predictions}} 21 | 22 | -------------------------------------------------------------------------------- /mlops/london_taxi/components/prep.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: prepare_taxi_data 3 | display_name: PrepTaxiData 4 | version: 1 5 | type: command 6 | inputs: 7 | raw_data: 8 | type: uri_folder 9 | outputs: 10 | prep_data: 11 | type: uri_folder 12 | code: ./../../../ 13 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 14 | environment_variables: 15 | ritesh: modi 16 | command: >- 17 | python -m src.london_src.prep.prep 18 | --raw_data ${{inputs.raw_data}} 19 | --prep_data ${{outputs.prep_data}} 20 | 21 | -------------------------------------------------------------------------------- /mlops/london_taxi/components/register.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: register_taxi_model 3 | display_name: RegisterTaxiModel 4 | version: 1 5 | type: command 6 | inputs: 7 | model_metadata: 8 | type: uri_folder 9 | model_name: 10 | type: string 11 | score_report: 12 | type: uri_folder 13 | build_reference: 14 | type: string 15 | code: ./../../../ 16 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 17 | command: >- 18 | python -m src.london_src.register.register 19 | --model_metadata ${{inputs.model_metadata}} 20 | --model_name ${{inputs.model_name}} 21 | --score_report ${{inputs.score_report}} 22 | --build_reference ${{inputs.build_reference}} 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /mlops/london_taxi/components/score.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: score_model 3 | version: 1 4 | display_name: ScoreModel 5 | type: command 6 | inputs: 7 | predictions: 8 | type: uri_folder 9 | model: 10 | type: uri_folder 11 | outputs: 12 | score_report: 13 | type: uri_folder 14 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 15 | code: ./../../../ 16 | command: >- 17 | python -m src.london_src.score.score 18 | --predictions ${{inputs.predictions}} 19 | --model ${{inputs.model}} 20 | --score_report ${{outputs.score_report}} 21 | 22 | 23 | -------------------------------------------------------------------------------- /mlops/london_taxi/components/train.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: train_linear_regression_model 3 | display_name: TrainLinearRegressionModel 4 | version: 1 5 | type: command 6 | inputs: 7 | training_data: 8 | type: uri_folder 9 | outputs: 10 | model_output: 11 | type: uri_folder 12 | test_data: 13 | type: uri_folder 14 | model_metadata: 15 | type: uri_file 16 | code: ./../../../ 17 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 18 | command: >- 19 | python -m src.london_src.train.train 20 | --training_data ${{inputs.training_data}} 21 | --test_data ${{outputs.test_data}} 22 | --model_output ${{outputs.model_output}} 23 | --model_metadata ${{outputs.model_metadata}} 24 | 25 | 26 | -------------------------------------------------------------------------------- /mlops/london_taxi/components/transform.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: taxi_feature_engineering 3 | display_name: TaxiFeatureEngineering 4 | version: 1 5 | type: command 6 | inputs: 7 | clean_data: 8 | type: uri_folder 9 | outputs: 10 | transformed_data: 11 | type: uri_folder 12 | code: ./../../../ 13 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 14 | command: >- 15 | python -m src.london_src.transform.transform 16 | --clean_data ${{inputs.clean_data}} 17 | --transformed_data ${{outputs.transformed_data}} 18 | 19 | -------------------------------------------------------------------------------- /mlops/london_taxi/environment/conda.yml: -------------------------------------------------------------------------------- 1 | name: prs-env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.8 6 | - pip 7 | - pip: 8 | - pandas 9 | - scikit-learn==1.2.0 10 | - mlflow>=2.7.1 11 | - azureml-mlflow>=1.51 12 | - mldesigner==0.1.0b4 13 | - azure-ai-ml==1.5.0 14 | - azure-identity==1.11.0 15 | - azure-keyvault-secrets==4.6.0 16 | -------------------------------------------------------------------------------- /mlops/london_taxi/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/mlops/london_taxi/src/__init__.py -------------------------------------------------------------------------------- /mlops/nyc_taxi/components/predict.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: predict_taxi_fares 3 | version: 1 4 | display_name: PredictTaxiFares 5 | type: command 6 | inputs: 7 | model_input: 8 | type: mlflow_model 9 | test_data: 10 | type: uri_folder 11 | outputs: 12 | predictions: 13 | type: uri_folder 14 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 15 | code: ./../../../ 16 | command: >- 17 | python -m src.nyc_src.predict.predict 18 | --model_input ${{inputs.model_input}} 19 | --test_data ${{inputs.test_data}} 20 | --predictions ${{outputs.predictions}} 21 | 22 | -------------------------------------------------------------------------------- /mlops/nyc_taxi/components/prep.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: prepare_taxi_data 3 | display_name: PrepTaxiData 4 | version: 1 5 | type: command 6 | inputs: 7 | raw_data: 8 | type: uri_folder 9 | outputs: 10 | prep_data: 11 | type: uri_folder 12 | code: ./../../../ 13 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 14 | environment_variables: 15 | ritesh: modi 16 | command: >- 17 | python -m src.nyc_src.prep.prep 18 | --raw_data ${{inputs.raw_data}} 19 | --prep_data ${{outputs.prep_data}} 20 | 21 | -------------------------------------------------------------------------------- /mlops/nyc_taxi/components/register.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: register_taxi_model 3 | display_name: RegisterTaxiModel 4 | version: 1 5 | type: command 6 | inputs: 7 | model_metadata: 8 | type: uri_folder 9 | model_name: 10 | type: string 11 | score_report: 12 | type: uri_folder 13 | build_reference: 14 | type: string 15 | code: ./../../../ 16 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 17 | command: >- 18 | python -m src.nyc_src.register.register 19 | --model_metadata ${{inputs.model_metadata}} 20 | --model_name ${{inputs.model_name}} 21 | --score_report ${{inputs.score_report}} 22 | --build_reference ${{inputs.build_reference}} 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /mlops/nyc_taxi/components/score.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: score_model 3 | version: 1 4 | display_name: ScoreModel 5 | type: command 6 | inputs: 7 | predictions: 8 | type: uri_folder 9 | model: 10 | type: uri_folder 11 | outputs: 12 | score_report: 13 | type: uri_folder 14 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 15 | code: ./../../../ 16 | command: >- 17 | python -m src.nyc_src.score.score 18 | --predictions ${{inputs.predictions}} 19 | --model ${{inputs.model}} 20 | --score_report ${{outputs.score_report}} 21 | 22 | 23 | -------------------------------------------------------------------------------- /mlops/nyc_taxi/components/train.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: train_linear_regression_model 3 | display_name: TrainLinearRegressionModel 4 | version: 1 5 | type: command 6 | inputs: 7 | training_data: 8 | type: uri_folder 9 | outputs: 10 | model_output: 11 | type: uri_folder 12 | test_data: 13 | type: uri_folder 14 | model_metadata: 15 | type: uri_file 16 | code: ./../../../ 17 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 18 | command: >- 19 | python -m src.nyc_src.train.train 20 | --training_data ${{inputs.training_data}} 21 | --test_data ${{outputs.test_data}} 22 | --model_output ${{outputs.model_output}} 23 | --model_metadata ${{outputs.model_metadata}} 24 | 25 | 26 | -------------------------------------------------------------------------------- /mlops/nyc_taxi/components/transform.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: taxi_feature_engineering 3 | display_name: TaxiFeatureEngineering 4 | version: 1 5 | type: command 6 | inputs: 7 | clean_data: 8 | type: uri_folder 9 | outputs: 10 | transformed_data: 11 | type: uri_folder 12 | code: ./../../../ 13 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 14 | command: >- 15 | python -m src.nyc_src.transform.transform 16 | --clean_data ${{inputs.clean_data}} 17 | --transformed_data ${{outputs.transformed_data}} 18 | 19 | -------------------------------------------------------------------------------- /mlops/nyc_taxi/environment/conda.yml: -------------------------------------------------------------------------------- 1 | name: prs-env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.8 6 | - pip 7 | - pip: 8 | - pandas 9 | - scikit-learn==1.2.0 10 | - mlflow>=2.7.1 11 | - azureml-mlflow>=1.51 12 | - mldesigner==0.1.0b4 13 | - azure-ai-ml==1.5.0 14 | - azure-identity==1.11.0 15 | - azure-keyvault-secrets==4.6.0 16 | -------------------------------------------------------------------------------- /mlops/nyc_taxi/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/mlops/nyc_taxi/src/__init__.py -------------------------------------------------------------------------------- /model/london_taxi/dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cuda11.6.2-gpu-inference:latest 2 | 3 | ARG model_folder 4 | ARG model_registered_name 5 | ARG model_file_name 6 | ARG score_file 7 | 8 | ENV MODEL_FILE_NAME=$model_file_name 9 | ENV MODEL_LOG_PATH=/var/azureml-app/logs/ 10 | ENV MODEL_NAME=$model_registered_name 11 | 12 | COPY environment/requirements.txt ./requirements.txt 13 | 14 | RUN pip install -r ./requirements.txt 15 | 16 | RUN mkdir -p /var/azureml-app/azureml-models 17 | RUN mkdir -p /var/azureml-app/logs/ 18 | # score file 19 | COPY scoring/$score_file /var/azureml-app/$score_file 20 | ENV AZUREML_ENTRY_SCRIPT=$score_file 21 | 22 | # Model 23 | COPY $model_registered_name/$model_file_name /var/azureml-app/azureml-models/$model_file_name 24 | ENV AZUREML_MODEL_DIR=/var/azureml-app/azureml-models 25 | 26 | CMD ["runsvdir","/var/runit"] -------------------------------------------------------------------------------- /model/london_taxi/environment/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.2 2 | pip==21.2.4 3 | scikit-learn==0.24.2 4 | scipy==1.7.1 5 | azureml-defaults==1.38.0 6 | joblib==1.0.1 -------------------------------------------------------------------------------- /model/london_taxi/pipeline-requirements.txt: -------------------------------------------------------------------------------- 1 | azure-cli==2.41 -------------------------------------------------------------------------------- /model/london_taxi/sample-request.json: -------------------------------------------------------------------------------- 1 | {"data": [ 2 | [0.9,40.73394012451172,-74.00725555419922,1,40.73118209838867,-74.00128173828125,0,1,6,1,10,11,50,31,6,1,10,11,55,51], 3 | [4.7,40.71599197387695,-73.99481964111328,1,40.71211624145508,-73.9439697265625,0,1,3,1,7,23,2,35,3,1,7,23,16,39], 4 | [1.22,40.69337844848633,-73.97087860107422,1,40.692501068115234,-73.98727416992188,0,2,5,1,30,21,4,7,5,1,30,21,9,9] 5 | ]} -------------------------------------------------------------------------------- /model/london_taxi/scoring/score.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy 4 | import joblib 5 | import csv 6 | import datetime 7 | from mlops.common.logger import get_logger 8 | 9 | logger = get_logger("london_taxi_score") 10 | 11 | def init(): 12 | """ 13 | This function is called when the container is initialized/started, typically after create/update of the deployment. 14 | You can write the logic here to perform init operations like caching the model in memory 15 | """ 16 | global model 17 | 18 | 19 | 20 | model_path = os.path.join( 21 | os.getenv("AZUREML_MODEL_DIR"), os.environ["MODEL_FILE_NAME"] 22 | ) 23 | # deserialize the model file back into a sklearn model 24 | model = joblib.load(model_path) 25 | logger.info("Init complete") 26 | 27 | 28 | def run(raw_data): 29 | """ 30 | This function is called for every invocation of the endpoint to perform the actual scoring/prediction. 31 | In the example we extract the data from the json input and call the scikit-learn model's predict() 32 | method and return the result back 33 | """ 34 | current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 35 | folder_path = f"{os.environ['MODEL_LOG_PATH']}{os.environ['MODEL_NAME']}/{current_time}" 36 | if not os.path.exists(folder_path): 37 | os.makedirs(folder_path) 38 | csv_input_path = f"{folder_path}/input.csv" 39 | csv_output_path = f"{folder_path}/output.csv" 40 | logger.info("model 1: request received") 41 | data = json.loads(raw_data)["data"] 42 | data = numpy.array(data) 43 | numpy.savetxt(csv_input_path, data, delimiter=",") 44 | 45 | result = model.predict(data) 46 | 47 | numpy.savetxt(csv_output_path, result, delimiter=",") 48 | logger.info("Request processed") 49 | return result.tolist() 50 | -------------------------------------------------------------------------------- /model/nyc_taxi/dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cuda11.6.2-gpu-inference:latest 2 | 3 | ARG model_folder 4 | ARG model_registered_name 5 | ARG model_file_name 6 | ARG score_file 7 | 8 | ENV MODEL_FILE_NAME=$model_file_name 9 | ENV MODEL_LOG_PATH=/var/azureml-app/logs/ 10 | ENV MODEL_NAME=$model_registered_name 11 | 12 | COPY environment/requirements.txt ./requirements.txt 13 | 14 | RUN pip install -r ./requirements.txt 15 | 16 | RUN mkdir -p /var/azureml-app/azureml-models 17 | RUN mkdir -p /var/azureml-app/logs/ 18 | # score file 19 | COPY scoring/$score_file /var/azureml-app/$score_file 20 | ENV AZUREML_ENTRY_SCRIPT=$score_file 21 | 22 | # Model 23 | COPY $model_registered_name/$model_file_name /var/azureml-app/azureml-models/$model_file_name 24 | ENV AZUREML_MODEL_DIR=/var/azureml-app/azureml-models 25 | 26 | CMD ["runsvdir","/var/runit"] -------------------------------------------------------------------------------- /model/nyc_taxi/environment/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.2 2 | pip==21.2.4 3 | scikit-learn==0.24.2 4 | scipy==1.7.1 5 | azureml-defaults==1.38.0 6 | joblib==1.0.1 -------------------------------------------------------------------------------- /model/nyc_taxi/pipeline-requirements.txt: -------------------------------------------------------------------------------- 1 | azure-cli==2.41 -------------------------------------------------------------------------------- /model/nyc_taxi/sample-request.json: -------------------------------------------------------------------------------- 1 | {"data": [ 2 | [0.9,40.73394012451172,-74.00725555419922,1,40.73118209838867,-74.00128173828125,0,1,6,1,10,11,50,31,6,1,10,11,55,51], 3 | [4.7,40.71599197387695,-73.99481964111328,1,40.71211624145508,-73.9439697265625,0,1,3,1,7,23,2,35,3,1,7,23,16,39], 4 | [1.22,40.69337844848633,-73.97087860107422,1,40.692501068115234,-73.98727416992188,0,2,5,1,30,21,4,7,5,1,30,21,9,9] 5 | ]} -------------------------------------------------------------------------------- /model/nyc_taxi/scoring/score.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import json 4 | import numpy 5 | import joblib 6 | import csv 7 | import datetime 8 | from mlops.common.logger import get_logger 9 | 10 | logger = get_logger("nyc_taxi_score") 11 | 12 | def init(): 13 | """ 14 | This function is called when the container is initialized/started, typically after create/update of the deployment. 15 | You can write the logic here to perform init operations like caching the model in memory 16 | """ 17 | global model 18 | 19 | 20 | 21 | model_path = os.path.join( 22 | os.getenv("AZUREML_MODEL_DIR"), os.environ["MODEL_FILE_NAME"] 23 | ) 24 | # deserialize the model file back into a sklearn model 25 | model = joblib.load(model_path) 26 | logger.info("Init complete") 27 | 28 | 29 | def run(raw_data): 30 | """ 31 | This function is called for every invocation of the endpoint to perform the actual scoring/prediction. 32 | In the example we extract the data from the json input and call the scikit-learn model's predict() 33 | method and return the result back 34 | """ 35 | current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 36 | folder_path = f"{os.environ['MODEL_LOG_PATH']}{os.environ['MODEL_NAME']}/{current_time}" 37 | if not os.path.exists(folder_path): 38 | os.makedirs(folder_path) 39 | csv_input_path = f"{folder_path}/input.csv" 40 | csv_output_path = f"{folder_path}/output.csv" 41 | logger.info("model 1: request received") 42 | data = json.loads(raw_data)["data"] 43 | data = numpy.array(data) 44 | numpy.savetxt(csv_input_path, data, delimiter=",") 45 | 46 | result = model.predict(data) 47 | 48 | numpy.savetxt(csv_output_path, result, delimiter=",") 49 | logger.info("Request processed") 50 | return result.tolist() 51 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/__init__.py -------------------------------------------------------------------------------- /src/london_src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/__init__.py -------------------------------------------------------------------------------- /src/london_src/predict/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/predict/__init__.py -------------------------------------------------------------------------------- /src/london_src/predict/predict.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | import os 4 | from pathlib import Path 5 | from sklearn.linear_model import LinearRegression 6 | import pickle 7 | from mlops.common.logger import get_logger 8 | 9 | logger = get_logger("london_taxi_predict") 10 | 11 | def main(model_input, test_data, prediction_path): 12 | lines = [ 13 | f"Model path: {model_input}", 14 | f"Test data path: {test_data}", 15 | f"Predictions path: {prediction_path}", 16 | ] 17 | 18 | for line in lines: 19 | logger.info(line) 20 | 21 | testX, testy = load_test_data(test_data) 22 | predict(testX, testy, model_input, prediction_path) 23 | 24 | 25 | # Load and split the test data 26 | def load_test_data(test_data): 27 | logger.info("mounted_path files: ") 28 | arr = os.listdir(test_data) 29 | 30 | logger.info(arr) 31 | df_list = [] 32 | for filename in arr: 33 | logger.info("reading file: %s ..." % filename) 34 | with open(os.path.join(test_data, filename), "r") as handle: 35 | input_df = pd.read_csv((Path(test_data) / filename)) 36 | df_list.append(input_df) 37 | 38 | test_data = df_list[0] 39 | testy = test_data["cost"] 40 | testX = test_data[ 41 | [ 42 | "distance", 43 | "dropoff_latitude", 44 | "dropoff_longitude", 45 | "passengers", 46 | "pickup_latitude", 47 | "pickup_longitude", 48 | "store_forward", 49 | "vendor", 50 | "pickup_weekday", 51 | "pickup_month", 52 | "pickup_monthday", 53 | "pickup_hour", 54 | "pickup_minute", 55 | "pickup_second", 56 | "dropoff_weekday", 57 | "dropoff_month", 58 | "dropoff_monthday", 59 | "dropoff_hour", 60 | "dropoff_minute", 61 | "dropoff_second", 62 | ] 63 | ] 64 | logger.info(testX.shape) 65 | logger.info(testX.columns) 66 | return testX, testy 67 | 68 | 69 | def predict(testX, testy, model_input, prediction_path): 70 | # Load the model from input port 71 | model = pickle.load(open((Path(model_input) / "model.sav"), "rb")) 72 | 73 | # Make predictions on testX data and record them in a column named predicted_cost 74 | predictions = model.predict(testX) 75 | testX["predicted_cost"] = predictions 76 | logger.info(testX.shape) 77 | 78 | # Compare predictions to actuals (testy) 79 | output_data = pd.DataFrame(testX) 80 | output_data["actual_cost"] = testy 81 | 82 | # Save the output data with feature columns, predicted cost, and actual cost in csv file 83 | output_data = output_data.to_csv((Path(prediction_path) / "predictions.csv")) 84 | 85 | 86 | if __name__ == "__main__": 87 | parser = argparse.ArgumentParser("predict") 88 | parser.add_argument("--model_input", type=str, help="Path of input model") 89 | parser.add_argument("--test_data", type=str, help="Path to test data") 90 | parser.add_argument("--predictions", type=str, help="Path of predictions") 91 | 92 | args = parser.parse_args() 93 | 94 | logger.info("hello scoring world...") 95 | 96 | model_input = args.model_input 97 | test_data = args.test_data 98 | prediction_path = args.predictions 99 | main(model_input, test_data, prediction_path) 100 | -------------------------------------------------------------------------------- /src/london_src/prep/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/prep/__init__.py -------------------------------------------------------------------------------- /src/london_src/prep/prep.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | from typing_extensions import Concatenate 4 | from uuid import uuid4 5 | from datetime import datetime 6 | import os 7 | import pandas as pd 8 | from sklearn.linear_model import LinearRegression 9 | from sklearn.model_selection import train_test_split 10 | import pickle 11 | from mlops.common.logger import get_logger 12 | 13 | logger = get_logger("london_taxi_prep") 14 | 15 | def main(raw_data, prep_data): 16 | logger.info("hello training world...") 17 | 18 | lines = [ 19 | f"Raw data path: {raw_data}", 20 | f"Data output path: {prep_data}", 21 | ] 22 | 23 | for line in lines: 24 | logger.info(line) 25 | 26 | logger.info("mounted_path files: ") 27 | arr = os.listdir(raw_data) 28 | logger.info(arr) 29 | 30 | df_list = [] 31 | for filename in arr: 32 | logger.info("reading file: %s ..." % filename) 33 | with open(os.path.join(raw_data, filename), "r") as handle: 34 | input_df = pd.read_csv((Path(raw_data) / filename)) 35 | df_list.append(input_df) 36 | 37 | # Prep the green and yellow taxi data 38 | green_data = df_list[0] 39 | yellow_data = df_list[1] 40 | 41 | data_prep(green_data, yellow_data) 42 | 43 | 44 | def data_prep(green_data, yellow_data): 45 | # Define useful columns needed for the Azure Machine Learning NYC Taxi tutorial 46 | useful_columns = str( 47 | [ 48 | "cost", 49 | "distance", 50 | "dropoff_datetime", 51 | "dropoff_latitude", 52 | "dropoff_longitude", 53 | "passengers", 54 | "pickup_datetime", 55 | "pickup_latitude", 56 | "pickup_longitude", 57 | "store_forward", 58 | "vendor", 59 | ] 60 | ).replace(",", ";") 61 | logger.info(useful_columns) 62 | 63 | # Rename columns as per Azure Machine Learning NYC Taxi tutorial 64 | green_columns = str( 65 | { 66 | "vendorID": "vendor", 67 | "lpepPickupDatetime": "pickup_datetime", 68 | "lpepDropoffDatetime": "dropoff_datetime", 69 | "storeAndFwdFlag": "store_forward", 70 | "pickupLongitude": "pickup_longitude", 71 | "pickupLatitude": "pickup_latitude", 72 | "dropoffLongitude": "dropoff_longitude", 73 | "dropoffLatitude": "dropoff_latitude", 74 | "passengerCount": "passengers", 75 | "fareAmount": "cost", 76 | "tripDistance": "distance", 77 | } 78 | ).replace(",", ";") 79 | 80 | yellow_columns = str( 81 | { 82 | "vendorID": "vendor", 83 | "tpepPickupDateTime": "pickup_datetime", 84 | "tpepDropoffDateTime": "dropoff_datetime", 85 | "storeAndFwdFlag": "store_forward", 86 | "startLon": "pickup_longitude", 87 | "startLat": "pickup_latitude", 88 | "endLon": "dropoff_longitude", 89 | "endLat": "dropoff_latitude", 90 | "passengerCount": "passengers", 91 | "fareAmount": "cost", 92 | "tripDistance": "distance", 93 | } 94 | ).replace(",", ";") 95 | 96 | logger.info("green_columns: " + green_columns) 97 | logger.info("yellow_columns: " + yellow_columns) 98 | 99 | green_data_clean = cleanseData(green_data, green_columns, useful_columns) 100 | yellow_data_clean = cleanseData(yellow_data, yellow_columns, useful_columns) 101 | 102 | # Append yellow data to green data 103 | combined_df = pd.concat([green_data_clean,yellow_data_clean], ignore_index=True) 104 | combined_df.reset_index(inplace=True, drop=True) 105 | 106 | output_green = green_data_clean.to_csv( 107 | os.path.join(prep_data, "green_prep_data.csv") 108 | ) 109 | output_yellow = yellow_data_clean.to_csv( 110 | os.path.join(prep_data, "yellow_prep_data.csv") 111 | ) 112 | merged_data = combined_df.to_csv(os.path.join(prep_data, "merged_data.csv")) 113 | 114 | logger.info("Finish") 115 | 116 | 117 | # These functions ensure that null data is removed from the dataset, 118 | # which will help increase machine learning model accuracy. 119 | def get_dict(dict_str): 120 | pairs = dict_str.strip("{}").split(";") 121 | new_dict = {} 122 | for pair in pairs: 123 | logger.info(pair) 124 | key, value = pair.strip().split(":") 125 | new_dict[key.strip().strip("'")] = value.strip().strip("'") 126 | return new_dict 127 | 128 | 129 | def cleanseData(data, columns, useful_columns): 130 | useful_columns = [ 131 | s.strip().strip("'") for s in useful_columns.strip("[]").split(";") 132 | ] 133 | new_columns = get_dict(columns) 134 | 135 | new_df = (data.dropna(how="all").rename(columns=new_columns))[useful_columns] 136 | 137 | new_df.reset_index(inplace=True, drop=True) 138 | return new_df 139 | 140 | 141 | if __name__ == "__main__": 142 | parser = argparse.ArgumentParser() 143 | parser.add_argument( 144 | "--raw_data", 145 | type=str, 146 | default="../data/raw_data", 147 | help="Path to raw data", 148 | ) 149 | parser.add_argument( 150 | "--prep_data", type=str, default="../data/prep_data", help="Path to prep data" 151 | ) 152 | 153 | args = parser.parse_args() 154 | raw_data = args.raw_data 155 | prep_data = args.prep_data 156 | 157 | main(raw_data, prep_data) 158 | -------------------------------------------------------------------------------- /src/london_src/register/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/register/__init__.py -------------------------------------------------------------------------------- /src/london_src/register/register.py: -------------------------------------------------------------------------------- 1 | import mlflow 2 | import argparse 3 | import os 4 | import json 5 | from pathlib import Path 6 | from mlops.common.logger import get_logger 7 | 8 | logger = get_logger("london_taxi_register") 9 | 10 | def main(model_metadata, model_name, score_report, build_reference): 11 | try: 12 | run_file = open(args.model_metadata) 13 | model_metadata = json.load(run_file) 14 | run_uri = model_metadata["run_uri"] 15 | 16 | 17 | score_file = open(Path(args.score_report) / "score.txt") 18 | score_data = json.load(score_file) 19 | cod = score_data["cod"] 20 | mse = score_data["mse"] 21 | coff = score_data["coff"] 22 | 23 | model_version = mlflow.register_model(run_uri, model_name) 24 | 25 | client = mlflow.MlflowClient() 26 | client.set_model_version_tag( 27 | name=model_name, version=model_version.version, key="mse", value=mse 28 | ) 29 | client.set_model_version_tag( 30 | name=model_name, version=model_version.version, key="coff", value=coff 31 | ) 32 | client.set_model_version_tag( 33 | name=model_name, version=model_version.version, key="cod", value=cod 34 | ) 35 | client.set_model_version_tag( 36 | name=model_name, 37 | version=model_version.version, 38 | key="build_id", 39 | value=build_reference, 40 | ) 41 | 42 | logger.info(model_version) 43 | except Exception as ex: 44 | logger.exception("Exception in register model") 45 | raise 46 | finally: 47 | run_file.close() 48 | 49 | 50 | if __name__ == "__main__": 51 | parser = argparse.ArgumentParser("register_model") 52 | parser.add_argument( 53 | "--model_metadata", 54 | type=str, 55 | help="model metadata on Machine Learning Workspace", 56 | ) 57 | parser.add_argument("--model_name", type=str, help="model name to be registered") 58 | parser.add_argument("--score_report", type=str, help="score report for the model") 59 | parser.add_argument( 60 | "--build_reference", 61 | type=str, 62 | help="Original AzDo build id that initiated experiment", 63 | ) 64 | 65 | args = parser.parse_args() 66 | 67 | logger.info(args.model_metadata) 68 | logger.info(args.model_name) 69 | logger.info(args.score_report) 70 | logger.info(args.build_reference) 71 | 72 | main(args.model_metadata, args.model_name, args.score_report, args.build_reference) 73 | -------------------------------------------------------------------------------- /src/london_src/score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/score/__init__.py -------------------------------------------------------------------------------- /src/london_src/score/score.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | import os 4 | from pathlib import Path 5 | from sklearn.linear_model import LinearRegression 6 | import pickle 7 | from sklearn.metrics import mean_squared_error, r2_score 8 | import mlflow 9 | import json 10 | from mlops.common.logger import get_logger 11 | 12 | logger = get_logger("london_taxi_score") 13 | 14 | def main(predictions, model, score_report): 15 | logger.info("hello scoring world...") 16 | 17 | 18 | lines = [ 19 | f"Model path: {model}", 20 | f"Predictions path: {predictions}", 21 | f"Scoring output path: {score_report}", 22 | ] 23 | 24 | for line in lines: 25 | logger.info(line) 26 | 27 | # Load the test data with predicted values 28 | 29 | logger.info("mounted_path files: ") 30 | arr = os.listdir(predictions) 31 | 32 | logger.info(arr) 33 | df_list = [] 34 | for filename in arr: 35 | logger.info("reading file: %s ..." % filename) 36 | with open(os.path.join(predictions, filename), "r") as handle: 37 | input_df = pd.read_csv((Path(predictions) / filename)) 38 | df_list.append(input_df) 39 | 40 | test_data = df_list[0] 41 | 42 | # Load the model from input port 43 | model = pickle.load(open((Path(model) / "model.sav"), "rb")) 44 | write_results(model, predictions, test_data, score_report) 45 | 46 | 47 | # Print the results of scoring the predictions against actual values in the test data 48 | def write_results(model, predictions, test_data, score_report): 49 | # The coefficients 50 | logger.info("Coefficients: \n", model.coef_) 51 | 52 | actuals = test_data["actual_cost"] 53 | predictions = test_data["predicted_cost"] 54 | 55 | mse = mean_squared_error(actuals, predictions) 56 | r2 = r2_score(actuals, predictions) 57 | 58 | mlflow.log_metric("scoring_mse", mse) 59 | mlflow.log_metric("scoring_r2", r2) 60 | 61 | # The mean squared error 62 | logger.info("Mean squared error: %.2f" % mse) 63 | # The coefficient of determination: 1 is perfect prediction 64 | logger.info("Coefficient of determination: %.2f" % r2) 65 | logger.info("Model: ", model) 66 | 67 | # logger.info score report to a text file 68 | model_score = { 69 | "mse": mean_squared_error(actuals, predictions), 70 | "coff": str(model.coef_), 71 | "cod": r2_score(actuals, predictions), 72 | } 73 | with open((Path(score_report) / "score.txt"), "w") as json_file: 74 | json.dump(model_score, json_file, indent=4) 75 | 76 | 77 | if __name__ == "__main__": 78 | parser = argparse.ArgumentParser("score") 79 | parser.add_argument( 80 | "--predictions", type=str, help="Path of predictions and actual data" 81 | ) 82 | parser.add_argument("--model", type=str, help="Path to model") 83 | parser.add_argument("--score_report", type=str, help="Path to score report") 84 | 85 | args = parser.parse_args() 86 | 87 | predictions = args.predictions 88 | model = args.model 89 | score_report = args.score_report 90 | 91 | main(predictions, model, score_report) 92 | -------------------------------------------------------------------------------- /src/london_src/train/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/train/__init__.py -------------------------------------------------------------------------------- /src/london_src/train/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | import os 4 | import pandas as pd 5 | from sklearn.linear_model import LinearRegression 6 | from sklearn.model_selection import train_test_split 7 | import pickle 8 | import mlflow 9 | import json 10 | from mlops.common.logger import get_logger 11 | 12 | logger = get_logger("london_taxi_train") 13 | 14 | 15 | def main(training_data, test_data, model_output, model_metadata): 16 | logger.info("Hello training world...") 17 | 18 | 19 | lines = [ 20 | f"Training data path: {training_data}", 21 | f"Test data path: {test_data}", 22 | f"Model output path: {model_output}", 23 | f"Model metadata path: {model_metadata}", 24 | ] 25 | 26 | for line in lines: 27 | logger.info(line) 28 | 29 | logger.info("mounted_path files: ") 30 | arr = os.listdir(training_data) 31 | logger.info(arr) 32 | 33 | df_list = [] 34 | for filename in arr: 35 | logger.info("reading file: %s ..." % filename) 36 | with open(os.path.join(training_data, filename), "r") as handle: 37 | input_df = pd.read_csv((Path(training_data) / filename)) 38 | df_list.append(input_df) 39 | 40 | train_data = df_list[0] 41 | logger.info(train_data.columns) 42 | 43 | trainX, testX, trainy, testy = split(train_data) 44 | write_test_data(testX, testy) 45 | train_model(trainX, trainy) 46 | 47 | 48 | def split(train_data): 49 | # Split the data into input(X) and output(y) 50 | y = train_data["cost"] 51 | X = train_data[ 52 | [ 53 | "distance", 54 | "dropoff_latitude", 55 | "dropoff_longitude", 56 | "passengers", 57 | "pickup_latitude", 58 | "pickup_longitude", 59 | "store_forward", 60 | "vendor", 61 | "pickup_weekday", 62 | "pickup_month", 63 | "pickup_monthday", 64 | "pickup_hour", 65 | "pickup_minute", 66 | "pickup_second", 67 | "dropoff_weekday", 68 | "dropoff_month", 69 | "dropoff_monthday", 70 | "dropoff_hour", 71 | "dropoff_minute", 72 | "dropoff_second", 73 | ] 74 | ] 75 | 76 | # Split the data into train and test sets 77 | trainX, testX, trainy, testy = train_test_split( 78 | X, y, test_size=0.3, random_state=42 79 | ) 80 | logger.info(trainX.shape) 81 | logger.info(trainX.columns) 82 | 83 | return trainX, testX, trainy, testy 84 | 85 | 86 | def train_model(trainX, trainy): 87 | mlflow.autolog() 88 | # Train a Linear Regression Model with the train set 89 | with mlflow.start_run() as run: 90 | model = LinearRegression().fit(trainX, trainy) 91 | logger.info(model.score(trainX, trainy)) 92 | 93 | # Output the model, metadata and test data 94 | run_id = mlflow.active_run().info.run_id 95 | model_uri = f"runs:/{run_id}/model" 96 | model_data = {"run_id": run.info.run_id, "run_uri": model_uri} 97 | with open(args.model_metadata, "w") as json_file: 98 | json.dump(model_data, json_file, indent=4) 99 | 100 | pickle.dump(model, open((Path(args.model_output) / "model.sav"), "wb")) 101 | 102 | 103 | def write_test_data(testX, testy): 104 | testX["cost"] = testy 105 | logger.info(testX.shape) 106 | testX.to_csv((Path(args.test_data) / "test_data.csv")) 107 | 108 | 109 | if __name__ == "__main__": 110 | parser = argparse.ArgumentParser("train") 111 | parser.add_argument("--training_data", type=str, help="Path to training data") 112 | parser.add_argument("--test_data", type=str, help="Path to test data") 113 | parser.add_argument("--model_output", type=str, help="Path of output model") 114 | parser.add_argument("--model_metadata", type=str, help="Path of model metadata") 115 | 116 | args = parser.parse_args() 117 | 118 | training_data = args.training_data 119 | test_data = args.test_data 120 | model_output = args.model_output 121 | model_metadata = args.model_metadata 122 | 123 | main(training_data, test_data, model_output, model_metadata) 124 | -------------------------------------------------------------------------------- /src/london_src/transform/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/transform/__init__.py -------------------------------------------------------------------------------- /src/nyc_src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/__init__.py -------------------------------------------------------------------------------- /src/nyc_src/predict/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/predict/__init__.py -------------------------------------------------------------------------------- /src/nyc_src/predict/predict.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | import os 4 | from pathlib import Path 5 | from sklearn.linear_model import LinearRegression 6 | import pickle 7 | from mlops.common.logger import get_logger 8 | 9 | logger = get_logger("myc_taxi_predict") 10 | 11 | def main(model_input, test_data, prediction_path): 12 | lines = [ 13 | f"Model path: {model_input}", 14 | f"Test data path: {test_data}", 15 | f"Predictions path: {prediction_path}", 16 | ] 17 | 18 | for line in lines: 19 | logger.info(line) 20 | 21 | testX, testy = load_test_data(test_data) 22 | predict(testX, testy, model_input, prediction_path) 23 | 24 | 25 | # Load and split the test data 26 | def load_test_data(test_data): 27 | logger.info("mounted_path files: ") 28 | arr = os.listdir(test_data) 29 | 30 | logger.info(arr) 31 | df_list = [] 32 | for filename in arr: 33 | logger.info("reading file: %s ..." % filename) 34 | with open(os.path.join(test_data, filename), "r") as handle: 35 | input_df = pd.read_csv((Path(test_data) / filename)) 36 | df_list.append(input_df) 37 | 38 | test_data = df_list[0] 39 | testy = test_data["cost"] 40 | testX = test_data[ 41 | [ 42 | "distance", 43 | "dropoff_latitude", 44 | "dropoff_longitude", 45 | "passengers", 46 | "pickup_latitude", 47 | "pickup_longitude", 48 | "store_forward", 49 | "vendor", 50 | "pickup_weekday", 51 | "pickup_month", 52 | "pickup_monthday", 53 | "pickup_hour", 54 | "pickup_minute", 55 | "pickup_second", 56 | "dropoff_weekday", 57 | "dropoff_month", 58 | "dropoff_monthday", 59 | "dropoff_hour", 60 | "dropoff_minute", 61 | "dropoff_second", 62 | ] 63 | ] 64 | logger.info(testX.shape) 65 | logger.info(testX.columns) 66 | return testX, testy 67 | 68 | 69 | def predict(testX, testy, model_input, prediction_path): 70 | # Load the model from input port 71 | model = pickle.load(open((Path(model_input) / "model.sav"), "rb")) 72 | 73 | # Make predictions on testX data and record them in a column named predicted_cost 74 | predictions = model.predict(testX) 75 | testX["predicted_cost"] = predictions 76 | logger.info(testX.shape) 77 | 78 | # Compare predictions to actuals (testy) 79 | output_data = pd.DataFrame(testX) 80 | output_data["actual_cost"] = testy 81 | 82 | # Save the output data with feature columns, predicted cost, and actual cost in csv file 83 | output_data = output_data.to_csv((Path(prediction_path) / "predictions.csv")) 84 | 85 | if __name__ == "__main__": 86 | parser = argparse.ArgumentParser("predict") 87 | parser.add_argument("--model_input", type=str, help="Path of input model") 88 | parser.add_argument("--test_data", type=str, help="Path to test data") 89 | parser.add_argument("--predictions", type=str, help="Path of predictions") 90 | 91 | args = parser.parse_args() 92 | 93 | logger.info("hello scoring world...") 94 | 95 | model_input = args.model_input 96 | test_data = args.test_data 97 | prediction_path = args.predictions 98 | main(model_input, test_data, prediction_path) 99 | -------------------------------------------------------------------------------- /src/nyc_src/prep/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/prep/__init__.py -------------------------------------------------------------------------------- /src/nyc_src/prep/prep.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | from typing_extensions import Concatenate 4 | from uuid import uuid4 5 | from datetime import datetime 6 | import os 7 | import pandas as pd 8 | from sklearn.linear_model import LinearRegression 9 | from sklearn.model_selection import train_test_split 10 | import pickle 11 | from mlops.common.logger import get_logger 12 | 13 | logger = get_logger("nyc_taxi_prep") 14 | 15 | 16 | def main(raw_data, prep_data): 17 | logger.info("hello training world...") 18 | 19 | lines = [ 20 | f"Raw data path: {raw_data}", 21 | f"Data output path: {prep_data}", 22 | ] 23 | 24 | for line in lines: 25 | logger.info(line) 26 | 27 | logger.info("mounted_path files: ") 28 | arr = os.listdir(raw_data) 29 | logger.info(arr) 30 | 31 | df_list = [] 32 | for filename in arr: 33 | logger.info("reading file: %s ..." % filename) 34 | with open(os.path.join(raw_data, filename), "r") as handle: 35 | input_df = pd.read_csv((Path(raw_data) / filename)) 36 | df_list.append(input_df) 37 | 38 | # Prep the green and yellow taxi data 39 | green_data = df_list[0] 40 | yellow_data = df_list[1] 41 | 42 | data_prep(green_data, yellow_data) 43 | 44 | 45 | def data_prep(green_data, yellow_data): 46 | # Define useful columns needed for the Azure Machine Learning NYC Taxi tutorial 47 | 48 | useful_columns = str( 49 | [ 50 | "cost", 51 | "distance", 52 | "dropoff_datetime", 53 | "dropoff_latitude", 54 | "dropoff_longitude", 55 | "passengers", 56 | "pickup_datetime", 57 | "pickup_latitude", 58 | "pickup_longitude", 59 | "store_forward", 60 | "vendor", 61 | ] 62 | ).replace(",", ";") 63 | logger.info(useful_columns) 64 | 65 | # Rename columns as per Azure Machine Learning NYC Taxi tutorial 66 | green_columns = str( 67 | { 68 | "vendorID": "vendor", 69 | "lpepPickupDatetime": "pickup_datetime", 70 | "lpepDropoffDatetime": "dropoff_datetime", 71 | "storeAndFwdFlag": "store_forward", 72 | "pickupLongitude": "pickup_longitude", 73 | "pickupLatitude": "pickup_latitude", 74 | "dropoffLongitude": "dropoff_longitude", 75 | "dropoffLatitude": "dropoff_latitude", 76 | "passengerCount": "passengers", 77 | "fareAmount": "cost", 78 | "tripDistance": "distance", 79 | } 80 | ).replace(",", ";") 81 | 82 | yellow_columns = str( 83 | { 84 | "vendorID": "vendor", 85 | "tpepPickupDateTime": "pickup_datetime", 86 | "tpepDropoffDateTime": "dropoff_datetime", 87 | "storeAndFwdFlag": "store_forward", 88 | "startLon": "pickup_longitude", 89 | "startLat": "pickup_latitude", 90 | "endLon": "dropoff_longitude", 91 | "endLat": "dropoff_latitude", 92 | "passengerCount": "passengers", 93 | "fareAmount": "cost", 94 | "tripDistance": "distance", 95 | } 96 | ).replace(",", ";") 97 | 98 | logger.info("green_columns: " + green_columns) 99 | logger.info("yellow_columns: " + yellow_columns) 100 | 101 | green_data_clean = cleanseData(green_data, green_columns, useful_columns) 102 | yellow_data_clean = cleanseData(yellow_data, yellow_columns, useful_columns) 103 | 104 | # Append yellow data to green data 105 | combined_df = pd.concat([green_data_clean,yellow_data_clean], ignore_index=True) 106 | combined_df.reset_index(inplace=True, drop=True) 107 | 108 | output_green = green_data_clean.to_csv( 109 | os.path.join(prep_data, "green_prep_data.csv") 110 | ) 111 | output_yellow = yellow_data_clean.to_csv( 112 | os.path.join(prep_data, "yellow_prep_data.csv") 113 | ) 114 | merged_data = combined_df.to_csv(os.path.join(prep_data, "merged_data.csv")) 115 | 116 | logger.info("Finish") 117 | 118 | 119 | # These functions ensure that null data is removed from the dataset, 120 | # which will help increase machine learning model accuracy. 121 | 122 | 123 | def get_dict(dict_str): 124 | pairs = dict_str.strip("{}").split(";") 125 | new_dict = {} 126 | for pair in pairs: 127 | logger.info(pair) 128 | key, value = pair.strip().split(":") 129 | new_dict[key.strip().strip("'")] = value.strip().strip("'") 130 | return new_dict 131 | 132 | 133 | def cleanseData(data, columns, useful_columns): 134 | useful_columns = [ 135 | s.strip().strip("'") for s in useful_columns.strip("[]").split(";") 136 | ] 137 | new_columns = get_dict(columns) 138 | 139 | new_df = (data.dropna(how="all").rename(columns=new_columns))[useful_columns] 140 | 141 | new_df.reset_index(inplace=True, drop=True) 142 | return new_df 143 | 144 | 145 | if __name__ == "__main__": 146 | parser = argparse.ArgumentParser() 147 | parser.add_argument( 148 | "--raw_data", 149 | type=str, 150 | default="../data/raw_data", 151 | help="Path to raw data", 152 | ) 153 | parser.add_argument( 154 | "--prep_data", type=str, default="../data/prep_data", help="Path to prep data" 155 | ) 156 | 157 | args = parser.parse_args() 158 | raw_data = args.raw_data 159 | prep_data = args.prep_data 160 | 161 | main(raw_data, prep_data) 162 | -------------------------------------------------------------------------------- /src/nyc_src/register/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/register/__init__.py -------------------------------------------------------------------------------- /src/nyc_src/register/register.py: -------------------------------------------------------------------------------- 1 | import mlflow 2 | import argparse 3 | import os 4 | import json 5 | from pathlib import Path 6 | 7 | from mlops.common.logger import get_logger 8 | 9 | logger = get_logger("nyc_taxi_register") 10 | 11 | def main(model_metadata, model_name, score_report, build_reference): 12 | try: 13 | run_file = open(args.model_metadata) 14 | model_metadata = json.load(run_file) 15 | run_uri = model_metadata["run_uri"] 16 | 17 | score_file = open(Path(args.score_report) / "score.txt") 18 | score_data = json.load(score_file) 19 | cod = score_data["cod"] 20 | mse = score_data["mse"] 21 | coff = score_data["coff"] 22 | 23 | model_version = mlflow.register_model(run_uri, model_name) 24 | 25 | client = mlflow.MlflowClient() 26 | client.set_model_version_tag( 27 | name=model_name, version=model_version.version, key="mse", value=mse 28 | ) 29 | client.set_model_version_tag( 30 | name=model_name, version=model_version.version, key="coff", value=coff 31 | ) 32 | client.set_model_version_tag( 33 | name=model_name, version=model_version.version, key="cod", value=cod 34 | ) 35 | client.set_model_version_tag( 36 | name=model_name, 37 | version=model_version.version, 38 | key="build_id", 39 | value=build_reference, 40 | ) 41 | 42 | logger.info(model_version) 43 | except Exception as ex: 44 | logger.info(ex) 45 | raise 46 | finally: 47 | run_file.close() 48 | 49 | 50 | if __name__ == "__main__": 51 | parser = argparse.ArgumentParser("register_model") 52 | parser.add_argument( 53 | "--model_metadata", 54 | type=str, 55 | help="model metadata on Machine Learning Workspace", 56 | ) 57 | parser.add_argument("--model_name", type=str, help="model name to be registered") 58 | parser.add_argument("--score_report", type=str, help="score report for the model") 59 | parser.add_argument( 60 | "--build_reference", 61 | type=str, 62 | help="Original AzDo build id that initiated experiment", 63 | ) 64 | 65 | args = parser.parse_args() 66 | 67 | logger.info(args.model_metadata) 68 | logger.info(args.model_name) 69 | logger.info(args.score_report) 70 | logger.info(args.build_reference) 71 | 72 | main(args.model_metadata, args.model_name, args.score_report, args.build_reference) 73 | -------------------------------------------------------------------------------- /src/nyc_src/score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/score/__init__.py -------------------------------------------------------------------------------- /src/nyc_src/score/score.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | import os 4 | from pathlib import Path 5 | from sklearn.linear_model import LinearRegression 6 | import pickle 7 | from sklearn.metrics import mean_squared_error, r2_score 8 | import mlflow 9 | import json 10 | 11 | from mlops.common.logger import get_logger 12 | 13 | logger = get_logger("nyc_taxi_score") 14 | 15 | def main(predictions, model, score_report): 16 | logger.info("hello scoring world...") 17 | 18 | lines = [ 19 | f"Model path: {model}", 20 | f"Predictions path: {predictions}", 21 | f"Scoring output path: {score_report}", 22 | ] 23 | 24 | for line in lines: 25 | logger.info(line) 26 | 27 | # Load the test data with predicted values 28 | 29 | logger.info("mounted_path files: ") 30 | arr = os.listdir(predictions) 31 | 32 | logger.info(arr) 33 | df_list = [] 34 | for filename in arr: 35 | logger.info("reading file: %s ..." % filename) 36 | with open(os.path.join(predictions, filename), "r") as handle: 37 | input_df = pd.read_csv((Path(predictions) / filename)) 38 | df_list.append(input_df) 39 | 40 | test_data = df_list[0] 41 | 42 | # Load the model from input port 43 | model = pickle.load(open((Path(model) / "model.sav"), "rb")) 44 | write_results(model, predictions, test_data, score_report) 45 | 46 | 47 | # Print the results of scoring the predictions against actual values in the test data 48 | 49 | 50 | def write_results(model, predictions, test_data, score_report): 51 | # The coefficients 52 | logger.info("Coefficients: \n", model.coef_) 53 | 54 | actuals = test_data["actual_cost"] 55 | predictions = test_data["predicted_cost"] 56 | 57 | mse = mean_squared_error(actuals, predictions) 58 | r2 = r2_score(actuals, predictions) 59 | 60 | mlflow.log_metric("scoring_mse", mse) 61 | mlflow.log_metric("scoring_r2", r2) 62 | 63 | # The mean squared error 64 | logger.info("Mean squared error: %.2f" % mse) 65 | # The coefficient of determination: 1 is perfect prediction 66 | logger.info("Coefficient of determination: %.2f" % r2) 67 | logger.info("Model: ", model) 68 | 69 | # Print score report to a text file 70 | model_score = { 71 | "mse": mean_squared_error(actuals, predictions), 72 | "coff": str(model.coef_), 73 | "cod": r2_score(actuals, predictions), 74 | } 75 | with open((Path(score_report) / "score.txt"), "w") as json_file: 76 | json.dump(model_score, json_file, indent=4) 77 | 78 | 79 | if __name__ == "__main__": 80 | parser = argparse.ArgumentParser("score") 81 | parser.add_argument( 82 | "--predictions", type=str, help="Path of predictions and actual data" 83 | ) 84 | parser.add_argument("--model", type=str, help="Path to model") 85 | parser.add_argument("--score_report", type=str, help="Path to score report") 86 | 87 | args = parser.parse_args() 88 | 89 | predictions = args.predictions 90 | model = args.model 91 | score_report = args.score_report 92 | 93 | main(predictions, model, score_report) 94 | -------------------------------------------------------------------------------- /src/nyc_src/train/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/train/__init__.py -------------------------------------------------------------------------------- /src/nyc_src/train/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | import os 4 | import pandas as pd 5 | from sklearn.linear_model import LinearRegression 6 | from sklearn.model_selection import train_test_split 7 | import pickle 8 | import mlflow 9 | import json 10 | from mlops.common.logger import get_logger 11 | 12 | logger = get_logger("nyc_taxi_train") 13 | 14 | def main(training_data, test_data, model_output, model_metadata): 15 | logger.info("Hello training world...") 16 | 17 | lines = [ 18 | f"Training data path: {training_data}", 19 | f"Test data path: {test_data}", 20 | f"Model output path: {model_output}", 21 | f"Model metadata path: {model_metadata}", 22 | ] 23 | 24 | for line in lines: 25 | logger.info(line) 26 | 27 | logger.info("mounted_path files: ") 28 | arr = os.listdir(training_data) 29 | logger.info(arr) 30 | 31 | df_list = [] 32 | for filename in arr: 33 | logger.info("reading file: %s ..." % filename) 34 | with open(os.path.join(training_data, filename), "r") as handle: 35 | input_df = pd.read_csv((Path(training_data) / filename)) 36 | df_list.append(input_df) 37 | 38 | train_data = df_list[0] 39 | logger.info(train_data.columns) 40 | 41 | trainX, testX, trainy, testy = split(train_data) 42 | write_test_data(testX, testy) 43 | train_model(trainX, trainy) 44 | 45 | 46 | def split(train_data): 47 | # Split the data into input(X) and output(y) 48 | y = train_data["cost"] 49 | X = train_data[ 50 | [ 51 | "distance", 52 | "dropoff_latitude", 53 | "dropoff_longitude", 54 | "passengers", 55 | "pickup_latitude", 56 | "pickup_longitude", 57 | "store_forward", 58 | "vendor", 59 | "pickup_weekday", 60 | "pickup_month", 61 | "pickup_monthday", 62 | "pickup_hour", 63 | "pickup_minute", 64 | "pickup_second", 65 | "dropoff_weekday", 66 | "dropoff_month", 67 | "dropoff_monthday", 68 | "dropoff_hour", 69 | "dropoff_minute", 70 | "dropoff_second", 71 | ] 72 | ] 73 | 74 | # Split the data into train and test sets 75 | trainX, testX, trainy, testy = train_test_split( 76 | X, y, test_size=0.3, random_state=42 77 | ) 78 | logger.info(trainX.shape) 79 | logger.info(trainX.columns) 80 | 81 | return trainX, testX, trainy, testy 82 | 83 | 84 | def train_model(trainX, trainy): 85 | mlflow.autolog() 86 | # Train a Linear Regression Model with the train set 87 | with mlflow.start_run() as run: 88 | model = LinearRegression().fit(trainX, trainy) 89 | logger.info(model.score(trainX, trainy)) 90 | 91 | # Output the model, metadata and test data 92 | run_id = mlflow.active_run().info.run_id 93 | model_uri = f"runs:/{run_id}/model" 94 | model_data = {"run_id": run.info.run_id, "run_uri": model_uri} 95 | with open(args.model_metadata, "w") as json_file: 96 | json.dump(model_data, json_file, indent=4) 97 | 98 | pickle.dump(model, open((Path(args.model_output) / "model.sav"), "wb")) 99 | 100 | 101 | def write_test_data(testX, testy): 102 | testX["cost"] = testy 103 | logger.info(testX.shape) 104 | testX.to_csv((Path(args.test_data) / "test_data.csv")) 105 | 106 | 107 | if __name__ == "__main__": 108 | parser = argparse.ArgumentParser("train") 109 | parser.add_argument("--training_data", type=str, help="Path to training data") 110 | parser.add_argument("--test_data", type=str, help="Path to test data") 111 | parser.add_argument("--model_output", type=str, help="Path of output model") 112 | parser.add_argument("--model_metadata", type=str, help="Path of model metadata") 113 | 114 | args = parser.parse_args() 115 | 116 | training_data = args.training_data 117 | test_data = args.test_data 118 | model_output = args.model_output 119 | model_metadata = args.model_metadata 120 | 121 | main(training_data, test_data, model_output, model_metadata) 122 | -------------------------------------------------------------------------------- /src/nyc_src/transform/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/transform/__init__.py -------------------------------------------------------------------------------- /src/nyc_src/transform/transform.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | from uuid import uuid4 4 | from datetime import datetime 5 | import os 6 | import pandas as pd 7 | import numpy as np 8 | 9 | from mlops.common.logger import get_logger 10 | 11 | logger = get_logger("nyc_taxi_transform") 12 | 13 | def main(clean_data, transformed_data): 14 | lines = [ 15 | f"Clean data path: {clean_data}", 16 | f"Transformed data output path: {transformed_data}", 17 | ] 18 | 19 | for line in lines: 20 | logger.info(line) 21 | 22 | logger.info("mounted_path files: ") 23 | arr = os.listdir(clean_data) 24 | logger.info(arr) 25 | 26 | df_list = [] 27 | for filename in arr: 28 | logger.info("reading file: %s ..." % filename) 29 | with open(os.path.join(clean_data, filename), "r") as handle: 30 | input_df = pd.read_csv((Path(clean_data) / filename)) 31 | df_list.append(input_df) 32 | 33 | # Transform the data 34 | combined_df = df_list[1] 35 | final_df = transform_data(combined_df) 36 | 37 | # Output data 38 | final_df.to_csv((Path(args.transformed_data) / "transformed_data.csv")) 39 | 40 | 41 | # These functions filter out coordinates for locations that are outside the city border. 42 | 43 | # Filter out coordinates for locations that are outside the city border. 44 | # Chain the column filter commands within the filter() function 45 | # and define the minimum and maximum bounds for each field 46 | 47 | 48 | def transform_data(combined_df): 49 | combined_df = combined_df.astype( 50 | { 51 | "pickup_longitude": "float64", 52 | "pickup_latitude": "float64", 53 | "dropoff_longitude": "float64", 54 | "dropoff_latitude": "float64", 55 | } 56 | ) 57 | 58 | latlong_filtered_df = combined_df[ 59 | (combined_df.pickup_longitude <= -73.72) 60 | & (combined_df.pickup_longitude >= -74.09) 61 | & (combined_df.pickup_latitude <= 40.88) 62 | & (combined_df.pickup_latitude >= 40.53) 63 | & (combined_df.dropoff_longitude <= -73.72) 64 | & (combined_df.dropoff_longitude >= -74.72) 65 | & (combined_df.dropoff_latitude <= 40.88) 66 | & (combined_df.dropoff_latitude >= 40.53) 67 | ] 68 | 69 | latlong_filtered_df.reset_index(inplace=True, drop=True) 70 | 71 | # These functions replace undefined values and rename to use meaningful names. 72 | replaced_stfor_vals_df = latlong_filtered_df.replace( 73 | {"store_forward": "0"}, {"store_forward": "N"} 74 | ).fillna({"store_forward": "N"}) 75 | 76 | replaced_distance_vals_df = replaced_stfor_vals_df.replace( 77 | {"distance": ".00"}, {"distance": 0} 78 | ).fillna({"distance": 0}) 79 | 80 | normalized_df = replaced_distance_vals_df.astype({"distance": "float64"}) 81 | 82 | # These functions transform the renamed data to be used finally for training. 83 | 84 | # Split the pickup and dropoff date further into the day of the week, day of the month, and month values. 85 | # To get the day of the week value, use the derive_column_by_example() function. 86 | # The function takes an array parameter of example objects that define the input data, 87 | # and the preferred output. The function automatically determines your preferred transformation. 88 | # For the pickup and dropoff time columns, split the time into the hour, minute, and second by using 89 | # the split_column_by_example() function with no example parameter. After you generate the new features, 90 | # use the drop_columns() function to delete the original fields as the newly generated features are preferred. 91 | # Rename the rest of the fields to use meaningful descriptions. 92 | 93 | temp = pd.DatetimeIndex(normalized_df["pickup_datetime"], dtype="datetime64[ns]") 94 | normalized_df["pickup_date"] = temp.date 95 | normalized_df["pickup_weekday"] = temp.dayofweek 96 | normalized_df["pickup_month"] = temp.month 97 | normalized_df["pickup_monthday"] = temp.day 98 | normalized_df["pickup_time"] = temp.time 99 | normalized_df["pickup_hour"] = temp.hour 100 | normalized_df["pickup_minute"] = temp.minute 101 | normalized_df["pickup_second"] = temp.second 102 | 103 | temp = pd.DatetimeIndex(normalized_df["dropoff_datetime"], dtype="datetime64[ns]") 104 | normalized_df["dropoff_date"] = temp.date 105 | normalized_df["dropoff_weekday"] = temp.dayofweek 106 | normalized_df["dropoff_month"] = temp.month 107 | normalized_df["dropoff_monthday"] = temp.day 108 | normalized_df["dropoff_time"] = temp.time 109 | normalized_df["dropoff_hour"] = temp.hour 110 | normalized_df["dropoff_minute"] = temp.minute 111 | normalized_df["dropoff_second"] = temp.second 112 | 113 | del normalized_df["pickup_datetime"] 114 | del normalized_df["dropoff_datetime"] 115 | 116 | normalized_df.reset_index(inplace=True, drop=True) 117 | 118 | logger.info(normalized_df.head) 119 | logger.info(normalized_df.dtypes) 120 | 121 | # Drop the pickup_date, dropoff_date, pickup_time, dropoff_time columns because they're 122 | # no longer needed (granular time features like hour, 123 | # minute and second are more useful for model training). 124 | del normalized_df["pickup_date"] 125 | del normalized_df["dropoff_date"] 126 | del normalized_df["pickup_time"] 127 | del normalized_df["dropoff_time"] 128 | 129 | # Change the store_forward column to binary values 130 | normalized_df["store_forward"] = np.where( 131 | (normalized_df.store_forward == "N"), 0, 1 132 | ) 133 | 134 | # Before you package the dataset, run two final filters on the dataset. 135 | # To eliminate incorrectly captured data points, 136 | # filter the dataset on records where both the cost and distance variable values are greater than zero. 137 | # This step will significantly improve machine learning model accuracy, 138 | # because data points with a zero cost or distance represent major outliers that throw off prediction accuracy. 139 | 140 | final_df = normalized_df[(normalized_df.distance > 0) & (normalized_df.cost > 0)] 141 | final_df.reset_index(inplace=True, drop=True) 142 | logger.info(final_df.head) 143 | 144 | return final_df 145 | 146 | 147 | if __name__ == "__main__": 148 | parser = argparse.ArgumentParser("transform") 149 | parser.add_argument("--clean_data", type=str, help="Path to prepped data") 150 | parser.add_argument("--transformed_data", type=str, help="Path of output data") 151 | 152 | args = parser.parse_args() 153 | 154 | clean_data = args.clean_data 155 | transformed_data = args.transformed_data 156 | main(clean_data, transformed_data) 157 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/README.md: -------------------------------------------------------------------------------- 1 | # Sample implementation of Model factory and Usecase builder 2 | 3 | ## Context 4 | 5 | This is a sample implementation of the model factory. 6 | 7 | ## Design and Architecture 8 | 9 | - [Design of the Model factory](docs/01-model-factory-design.md) 10 | 11 | ## Adaptation from the accelerator and differences 12 | 13 | ### How this sample implementation is built on top of the accelerator 14 | 15 | - Cloned the accelerator repository into a client specific repository. 16 | - Configured the pipelines in Azure Devops, and made relevant changes to adapt to the client's ways of working. 17 | - Extended the mlops runner to support the client's use-case. 18 | - Inference code for the model was updated related to the specific model. 19 | 20 | ### Differences in the sample implementation from the accelerator 21 | 22 | - There are few changes to the folder structure in the sample implementation. The accelerator will have functional blocks (ml-ops, model, src) as top level folders. Where as in the sample implementation, we have the models at the top and the functional blocks will be within the specific models. 23 | - In the accelerator, we have the devops pipeline to setup and trigger the `mlops-pipeline` in the common folder. While implementing, we had a scenario to pass in different variables for different models' ml-ops-pipeline, hence this is moved to the model specific folder. 24 | 25 | ## How to use this sample 26 | 27 | - [Instructions to use this sample](docs/02-instructions.md) -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/docs/01-model-factory-design.md: -------------------------------------------------------------------------------- 1 | # Model Factory Design 2 | 3 | This document provides an overview and detailed specifications for the design and implementation of a model factory. 4 | 5 | ## Introduction 6 | 7 | The model factory is a system that automates the end-to-end process of developing, training, deploying, and managing machine learning models. This document outlines the architecture, components, and workflow of the model factory, along with the necessary requirements and considerations. 8 | 9 | ## Features of Model Factory 10 | 11 | - Supports generation of multiple ML Models. 12 | - MLOps pipeline for Data preparation, transformation, Model Training, evaluation, scoring and registration. 13 | - Each ML Model is packaged in an independent Docker Image. 14 | - Model verification before storing the Docker image. 15 | - All Docker images are stored in Azure Container Registry. 16 | - Builds and deploys Smoke Test module on Edge device. 17 | - Based on Azure ML SDK v2 1.4 18 | 19 | ## Architecture 20 | 21 | It consists of following components: 22 | 23 | 1. **Model Source** 24 | Model source contains the code written by data science team to execute the different stages of model development lifecycle. It is part of `src` folder in the model directory. 25 | 1. **Model Packaging** 26 | This component is used to package ML model into docker container images. It is part of `model` folder in model directory. 27 | 1. **MLOps Pipelines** 28 | It is a placeholder for different ml components used to create MLOps pipelines. It also contains ml ops pipelines code which can be triggered from DevOps pipelines. It is part of `mlops` folder in model directory. 29 | 1. **DevOps Pipelines** 30 | It contains Azure DevOps related pipelines which help in converting the ml code into model container images and push them to the container registry. It is part of `devops` folder in model directory. These pipelines perform various tasks like: 31 | a. Validation of python code(linting, and unit testing) 32 | b. Execution of MLOps pipelines and registering model on AML workspace. 33 | c. Creation of model docker container images. 34 | d. Pushing those docker container images to ACR. 35 | e. Performing Smoke tests on those docker container images. 36 | 1. **Model Repository** 37 | Model Repository is part of AML workspace which store all the models generated by MLOps pipelines. 38 | 1. **Docker Container Repository** 39 | It stores all the model docker container images. 40 | 1. **Notebooks** 41 | These are Jupyter notebooks used by data science team to work and test logic of model generation. These are contained in `notebooks` folder in model_factory folder. 42 | 1. **Common Code** 43 | These are common code which consists of common DevOps template pipelines and also common code for MLOps. It is contained in `common` folder in model_factory directory. 44 | 1. **Unit Tests** 45 | These are specific to each model and are contained in `tests` folder in model directory. 46 | 47 | ## Development workflow of Model Factory 48 | 49 | ![development flow](/docs/assets/images/model_factory_design.jpg) 50 | 51 | The above diagram depicts the development workflow of model factory. At a high level following steps are followed in the workflow: 52 | 53 | 1. Data Science team works on development of model on local machine using VSCode remote extension or on AML workspace. This development is generally done in Jupyter notebook. 54 | 1. Once the model is ready and is tested, code from notebooks is brought into `src` folder for that model. 55 | 1. Code is then pushed to dev environment, where MLOps and DevOps pipelines are built and triggered which helps in automating the generation of ML models and pushing ML model docker containers images the dev ACR. 56 | 1. Data Science team validates the model metrics after which the code is push to prod env where pipelines execute basic tests. 57 | 1. Once all steps are successful there is final Gated approval check which allows pushing of these images into prod ACR. 58 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/docs/02-instructions.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | This document contains the instructions to use this sample. 4 | 5 | ## Prerequisite 6 | 7 | Following azure resources are required to run this sample: 8 | 9 | 1. Azure AML Workspace: 10 | AML workspace also create following resources: 11 | 1. Application Insights 12 | 1. Azure Keyvault 13 | 1. Azure Blob Storage 14 | 1. Azure Container Registry 15 | 1. Azure VM: It will be used for smoke test. 16 | 1. Azure VM: It will be used for e2e test. 17 | 1. Azure Devops 18 | 1. Service Principal 19 | 20 | ## Steps 21 | 22 | 1. Provision Azure AML workspace. 23 | 1. Provision Azure Container registry. 24 | 1. Create Service Connection in Azure Devops providing access to resource group. 25 | 1. Create two variable groups in Azure Devops 26 | 1. mlops_platform_dev_vg 27 | 1. mlops_platform_prod_vg 28 | Add following variables to the both these variable group. Here different ACR's can be used for prod and dev env. 29 | 1. ACR_URL : Azure container registry url 30 | 1. ACR_USERNAME : Azure container registry username 31 | 1. ACR_PASSWORD: Azure container registry password 32 | 1. AZURE_RM_SVC_CONNECTION: Service Connection name 33 | 1. KEYVAULT_NAME: Keyvault name 34 | 1. Create Service Principal 35 | `az ad sp create-for-rbac --name --role owner --scopes /subscriptions//resourceGroups/` 36 | 1. Give Service Principal access to AML workspace and Keyvault 37 | 1. Give Service Connection access to AML worksapce and keyvault. 38 | 1. Add following secrets to Azure Keyvault: 39 | 1. aml-service-principal-id: Service Principal created in step 7 40 | 1. aml-service-principal-secret: Service Principal secret 41 | 1. tenant-id 42 | 1. applicationinsights-connection-string: Application insights connection string 43 | 44 | Following variables are related to ACR. 45 | 1. registry-uri: Dev ACR url 46 | 1. registry-password : Dev ACR password 47 | 1. registry-username: Dev ACR username 48 | Dev ACR 49 | 1. registry-uri-dev: Dev ACR url 50 | 1. registry-username-dev: Dev ACR username 51 | 1. registry-password-dev: Dev ACR password 52 | Prod ACR 53 | 1. registry-uri-prod: Prod ACR url 54 | 1. registry-username-prod: Prod ACR username 55 | 1. registry-password-prod: Prod ACR password 56 | 1. Update model_config(`model_factory\fridge_obj_det\config\model_config.json`) with required values. 57 | 1. Create Azure Pipelines using following yaml files. 58 | 1. Model factory Pipelines: 59 | 1. fridge_obj_det_dev_pipeline: `model_factory\fridge_obj_det\devops\pipelines\fridge_obj_det_dev_pipeline.yml` 60 | 1. fridge_obj_det_main_pipeline: 61 | `model_factory\fridge_obj_det\devops\pipelines\fridge_obj_det_main_pipeline.yml` 62 | 63 | 1. Execution of pipelines 64 | 1. Model Factory Pipelines: 65 | - Model factory dev pipeline executes AML pipelines and creates model container docker image and pushes it to ACR. 66 | - Model factory main pipeline executes AML pipeline, creates docker image, and pushes image to ACR. 67 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/docs/assets/images/model_factory_design.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/telco_case_study_implementation/fridge_object_detection/docs/assets/images/model_factory_design.jpg -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/__init__.py: -------------------------------------------------------------------------------- 1 | """TODO Doc string.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/__init__.py: -------------------------------------------------------------------------------- 1 | """Common module for model factory.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/configure_azureml_agent.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: model_type 3 | displayName: "type of model to execute" 4 | 5 | steps: 6 | - task: UsePythonVersion@0 7 | displayName: "Use Python 3.8" 8 | inputs: 9 | versionSpec: "3.8" 10 | 11 | - script: | 12 | python -m venv env 13 | source env/bin/activate 14 | displayName: "Create Virtual env" 15 | 16 | - task: AzureCLI@2 17 | displayName: Install Job Requirements 18 | inputs: 19 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 20 | scriptType: bash 21 | scriptLocation: inlineScript 22 | inlineScript: | 23 | set -e # fail on error 24 | source env/bin/activate 25 | python -m pip install --upgrade pip 26 | pip install -r $(System.DefaultWorkingDirectory)/model_factory/${{parameters.model_type}}/devops/pipelines/requirements/execute_job_requirements.txt 27 | az version 28 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/execute_mlops_pipeline.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: script_parameter 3 | type: string 4 | 5 | steps: 6 | - task: AzureKeyVault@2 7 | continueOnError: false 8 | inputs: 9 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 10 | KeyVaultName: $(KEYVAULT_NAME) 11 | SecretsFilter: "*" 12 | RunAsPreJob: false 13 | - task: AzureCLI@2 14 | name: Execute_ml_Job_Pipeline 15 | displayName: Execute Azure ML pipeline job 16 | continueOnError: false 17 | inputs: 18 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 19 | scriptType: bash 20 | workingDirectory: $(System.DefaultWorkingDirectory)/model_factory 21 | scriptLocation: inlineScript 22 | inlineScript: | 23 | source ../env/bin/activate 24 | export AZURE_CLIENT_ID=$(aml-service-principal-id) 25 | export AZURE_TENANT_ID=$(tenant-id) 26 | export AZURE_CLIENT_SECRET=$(aml-service-principal-secret) 27 | which python 28 | ${{parameters.script_parameter}} 29 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/experiment_variables.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | - name: ML_MODEL_CONFIG_NAME 3 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ML_MODEL_CONFIG_NAME'] ] 4 | - name: KEYVAULT_NAME 5 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.KEYVAULT_NAME'] ] 6 | - name: ML_SCOPE_SVC_CONNECTION 7 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ML_SCOPE_SVC_CONNECTION'] ] 8 | - name: HOST_PORT_NUMBER 9 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.HOST_PORT_NUMBER'] ] 10 | - name: EXPERIMENT_BASE_NAME 11 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.EXPERIMENT_BASE_NAME'] ] 12 | - name: ENVIRONMENT_NAME 13 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ENVIRONMENT_NAME'] ] 14 | - name: ENV_BASE_IMAGE_NAME 15 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ENV_BASE_IMAGE_NAME'] ] 16 | - name: DISPLAY_BASE_NAME 17 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.DISPLAY_BASE_NAME'] ] 18 | - name: CONDA_PATH 19 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CONDA_PATH'] ] 20 | - name: CLUSTER_SIZE 21 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_SIZE'] ] 22 | - name: CLUSTER_REGION 23 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_REGION'] ] 24 | - name: CLUSTER_NAME 25 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_NAME'] ] 26 | - name: AZURE_RM_SVC_CONNECTION 27 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.AZURE_RM_SVC_CONNECTION'] ] 28 | - name: MODEL_BASE_NAME 29 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.MODEL_BASE_NAME'] ] 30 | - name: REGISTRY_NAME 31 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.REGISTRY_NAME'] ] 32 | - name: RESOURCE_GROUP_NAME 33 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.RESOURCE_GROUP_NAME'] ] 34 | - name: SCORE_FILE_NAME 35 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.SCORE_FILE_NAME'] ] 36 | - name: WORKSPACE_NAME 37 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.WORKSPACE_NAME'] ] 38 | - name: EXPERIMENT_NAME 39 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.EXPERIMENT_NAME'] ] 40 | - name: DISPLAY_NAME 41 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.DISPLAY_NAME'] ] 42 | - name: MODEL_NAME 43 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.MODEL_NAME'] ] 44 | - name: HEALTHCHECK_TIMEOUT 45 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.HEALTHCHECK_TIMEOUT'] ] 46 | - name: AML_ARTIFACT_PATH 47 | value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.AML_ARTIFACT_PATH'] ] -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/get_connection_details.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - task: AzureCLI@2 3 | name: retrieveAzureServiceConnection 4 | displayName: Retrieve Azure Service Connection 5 | inputs: 6 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 7 | scriptLocation: inlineScript 8 | scriptType: bash 9 | inlineScript: | 10 | export subscriptionId=$(az account show --query id -o tsv) 11 | echo "##vso[task.setvariable variable=SUBSCRIPTION_ID]$subscriptionId" 12 | echo "##vso[task.setvariable variable=TENANT_ID]$tenantId" 13 | addSpnToEnvironment: true -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/image_generation_template.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: exec_environment 3 | displayName: "Execution Environment" 4 | default: "dev" 5 | - name: model_type 6 | displayName: "type of model to execute" 7 | - name: model_name 8 | displayName: "model name" 9 | default: $(MODEL_NAME) 10 | - name: dependencies 11 | default: "" 12 | 13 | jobs: 14 | - job: build_docker_image 15 | timeoutInMinutes: 0 16 | dependsOn: ${{ parameters.dependencies }} 17 | steps: 18 | - task: UsePythonVersion@0 19 | displayName: "install python 3.8" 20 | continueOnError: false 21 | inputs: 22 | versionSpec: "3.8" 23 | 24 | - task: AzureKeyVault@2 25 | continueOnError: false 26 | inputs: 27 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 28 | KeyVaultName: $(KEYVAULT_NAME) 29 | SecretsFilter: "*" 30 | RunAsPreJob: false 31 | 32 | - script: | 33 | python -m venv env 34 | displayName: "Create Virtual env" 35 | 36 | - task: AzureCLI@2 37 | displayName: "setup the build server" 38 | continueOnError: false 39 | inputs: 40 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 41 | scriptType: bash 42 | workingDirectory: $(system.DefaultWorkingDirectory) 43 | scriptLocation: inlineScript 44 | inlineScript: | 45 | source env/bin/activate 46 | python -m pip install --upgrade pip 47 | pip install -r model_factory/${{parameters.model_type}}/devops/pipelines/requirements/execute_job_requirements.txt 48 | az extension add -n ml -y 49 | az upgrade --yes 50 | az config set extension.use_dynamic_install=yes_without_prompt 51 | 52 | - task: AzureCLI@2 53 | displayName: Download Model artifacts from AzureML Model Registry 54 | continueOnError: false 55 | inputs: 56 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 57 | scriptType: bash 58 | scriptLocation: inlineScript 59 | workingDirectory: $(system.DefaultWorkingDirectory) 60 | inlineScript: | 61 | set -e # fail on error 62 | source env/bin/activate 63 | az ml model download --name ${{parameters.model_name}} --version $(az ml model list --name ${{parameters.model_name}} --resource-group $(RESOURCE_GROUP_NAME) --workspace-name $(WORKSPACE_NAME) --query "[0].version" --output tsv) --download-path model_factory/${{parameters.model_type}}/model/model_download -g $(RESOURCE_GROUP_NAME) -w $(WORKSPACE_NAME) 64 | tar -xvzf model_factory/${{parameters.model_type}}/model/model_download/${{parameters.model_name}}/model_artifacts.tar.gz -C model_factory/${{parameters.model_type}}/model/model_artifacts 65 | 66 | - task: AzureCLI@2 67 | displayName: Build Docker Image with model artifacts 68 | continueOnError: false 69 | inputs: 70 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 71 | scriptType: bash 72 | workingDirectory: $(system.DefaultWorkingDirectory)/model_factory/${{parameters.model_type}}/model 73 | scriptLocation: inlineScript 74 | inlineScript: | 75 | set -e # fail on error 76 | # docker buildx use mybuilder 77 | # docker buildx inspect --bootstrap 78 | docker login "$(ACR_URL)" -u "$(ACR_USERNAME)" -p "$(ACR_PASSWORD)" 79 | docker buildx build --platform linux/amd64 -t $(ACR_URL)/$(MODEL_NAME):$(Build.BuildNumber) . --push 80 | # docker buildx build --platform linux/amd64 -t $(ACR_URL)/$(MODEL_NAME):$(Build.BuildNumber) . --push 81 | docker images 82 | echo "##vso[task.setvariable variable=ML_MODEL_DOCKER_IMAGE_NAME;isOutput=true;]$(MODEL_NAME)" 83 | echo "##vso[task.setvariable variable=ML_MODEL_DOCKER_IMAGE_VERSION;isOutput=true;]$(Build.BuildNumber)" 84 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/platform_dev_pipeline.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: exec_environment 3 | displayName: "Execution Environment" 4 | default: "dev" 5 | - name: model_type 6 | displayName: "type of model to execute" 7 | - name: runMlops 8 | displayName: "Run MLOps" 9 | default: true 10 | 11 | stages: 12 | - stage: execute_training_job 13 | displayName: execute_training_job 14 | dependsOn: 15 | - variable_generation 16 | - build_validation 17 | variables: 18 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/experiment_variables.yml 19 | jobs: 20 | - job: Execute_ml_Job_Pipeline 21 | condition: eq('${{ parameters.runMlops }}', true) 22 | timeoutInMinutes: 360 23 | steps: 24 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/get_connection_details.yml 25 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/configure_azureml_agent.yml 26 | parameters: 27 | model_type: ${{parameters.model_type}} 28 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/${{parameters.model_type}}/devops/pipelines/${{parameters.model_type}}_mlops_pipeline.yml 29 | parameters: 30 | model_type: ${{parameters.model_type}} 31 | model_name: $(MODEL_NAME) 32 | 33 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/image_generation_template.yml 34 | parameters: 35 | exec_environment: ${{parameters.exec_environment}} 36 | model_type: ${{parameters.model_type}} 37 | dependencies: "Execute_ml_Job_Pipeline" 38 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/platform_main_pipeline.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: exec_environment 3 | displayName: "Execution Environment" 4 | default: "dev" 5 | - name: model_type 6 | displayName: "type of model to execute" 7 | - name: TARGET_CONDITION 8 | displayName: "Target Condition for the deployment" 9 | - name: runMlops 10 | displayName: "Run MLOps" 11 | default: true 12 | 13 | stages: 14 | - stage: execute_training_job 15 | displayName: execute_training_job 16 | dependsOn: 17 | - variable_generation 18 | - build_validation 19 | variables: 20 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/experiment_variables.yml 21 | - group: mlops_platform_dev_vg 22 | jobs: 23 | - job: Execute_ml_Job_Pipeline 24 | condition: eq('${{ parameters.runMlops }}', true) 25 | timeoutInMinutes: 360 26 | steps: 27 | - task: UsePythonVersion@0 28 | displayName: "Use Python 3.8" 29 | inputs: 30 | versionSpec: "3.8" 31 | addToPath: true 32 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/get_connection_details.yml 33 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/configure_azureml_agent.yml 34 | parameters: 35 | model_type: ${{parameters.model_type}} 36 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/${{parameters.model_type}}/devops/pipelines/${{parameters.model_type}}_mlops_pipeline.yml 37 | parameters: 38 | model_type: ${{parameters.model_type}} 39 | model_name: "${{parameters.model_type}}_prod_master" 40 | 41 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/image_generation_template.yml 42 | parameters: 43 | exec_environment: ${{ parameters.exec_environment }} 44 | model_type: ${{ parameters.model_type }} 45 | model_name: "${{parameters.model_type}}_prod_master" 46 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/variables_template.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: exec_environment 3 | displayName: "Execution Environment" 4 | - name: model_type 5 | displayName: "type of model to execute" 6 | 7 | stages: 8 | - stage: variable_generation 9 | jobs: 10 | - job: load_config_variables 11 | steps: 12 | - powershell: | 13 | $json = Get-Content -Raw -Path '$(System.DefaultWorkingDirectory)/model_factory/${{parameters.model_type}}/config/model_config.json' | ConvertFrom-Json 14 | $firstElement = $json.models | Where-Object {($_.ML_MODEL_CONFIG_NAME -eq "${{ parameters.model_type }}") -and ($_.ENV_NAME -eq "${{ parameters.exec_environment }}")} | Select-Object -First 1 15 | 16 | Write-Output $firstElement.KEYVAULT_NAME 17 | 18 | foreach ($property in $firstElement.PSObject.Properties) { 19 | $pname = $property.Name 20 | $pvalue = $property.Value 21 | Write-Output "##vso[task.setvariable variable=$pname;isoutput=true]$pvalue" 22 | } 23 | 24 | $EXPERIMENT_NAME = "${{ parameters.model_type }}" + "_" + "$($firstElement.EXPERIMENT_BASE_NAME)" + "_" + "${{parameters.exec_environment}}" + "_" + "$(Build.SourceBranchName)" 25 | Write-Output "##vso[task.setvariable variable=EXPERIMENT_NAME;isoutput=true]$EXPERIMENT_NAME" 26 | 27 | $DISPLAY_NAME = "${{ parameters.model_type }}" + "_" + "$($firstElement.DISPLAY_BASE_NAME)" + "_" + "${{parameters.exec_environment}}" + "_" + "$(Build.BuildID)" 28 | Write-Output "##vso[task.setvariable variable=DISPLAY_NAME;isoutput=true]$DISPLAY_NAME" 29 | Write-Output $DISPLAY_NAME 30 | $MODEL_NAME = "${{ parameters.model_type }}" + "_" + "$($firstElement.MODEL_BASE_NAME)" + "_" + "${{parameters.exec_environment}}" + "_" + "$(Build.SourceBranchName)" 31 | Write-Output "##vso[task.setvariable variable=MODEL_NAME;isoutput=true]$MODEL_NAME" 32 | name: loading_model_config 33 | 34 | - job: validate_assign_variables 35 | dependsOn: load_config_variables 36 | variables: 37 | - name: ML_MODEL_CONFIG_NAME 38 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.ML_MODEL_CONFIG_NAME'] ] 39 | - name: KEYVAULT_NAME 40 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.KEYVAULT_NAME'] ] 41 | - name: ML_SCOPE_SVC_CONNECTION 42 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.ML_SCOPE_SVC_CONNECTION'] ] 43 | - name: HOST_PORT_NUMBER 44 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.HOST_PORT_NUMBER'] ] 45 | - name: EXPERIMENT_BASE_NAME 46 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.EXPERIMENT_BASE_NAME'] ] 47 | - name: ENVIRONMENT_NAME 48 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.ENVIRONMENT_NAME'] ] 49 | - name: ENV_BASE_IMAGE_NAME 50 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.ENV_BASE_IMAGE_NAME'] ] 51 | - name: DISPLAY_BASE_NAME 52 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.DISPLAY_BASE_NAME'] ] 53 | - name: CONDA_PATH 54 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.CONDA_PATH'] ] 55 | - name: CLUSTER_SIZE 56 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_SIZE'] ] 57 | - name: CLUSTER_REGION 58 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_REGION'] ] 59 | - name: CLUSTER_NAME 60 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_NAME'] ] 61 | - name: AZURE_RM_SVC_CONNECTION 62 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.AZURE_RM_SVC_CONNECTION'] ] 63 | - name: MODEL_BASE_NAME 64 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.MODEL_BASE_NAME'] ] 65 | - name: REGISTRY_NAME 66 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.REGISTRY_NAME'] ] 67 | - name: RESOURCE_GROUP_NAME 68 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.RESOURCE_GROUP_NAME'] ] 69 | - name: SCORE_FILE_NAME 70 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.SCORE_FILE_NAME'] ] 71 | - name: WORKSPACE_NAME 72 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.WORKSPACE_NAME'] ] 73 | - name: EXPERIMENT_NAME 74 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.EXPERIMENT_NAME'] ] 75 | - name: DISPLAY_NAME 76 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.DISPLAY_NAME'] ] 77 | - name: MODEL_NAME 78 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.MODEL_NAME'] ] 79 | - name: HEALTHCHECK_TIMEOUT 80 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.HEALTHCHECK_TIMEOUT'] ] 81 | - name: AML_ARTIFACT_PATH 82 | value: $[ dependencies.load_config_variables.outputs['loading_model_config.AML_ARTIFACT_PATH'] ] 83 | steps: 84 | - script: | 85 | if [ -z "$(ML_MODEL_CONFIG_NAME)" ] 86 | then 87 | echo "variables are not available. Check parameter values or config json file for valid values.." 88 | exit 1 89 | else 90 | echo "variables were loaded from config file.." 91 | printenv 92 | fi 93 | name: validate_variable_load 94 | - task: AzureKeyVault@2 95 | continueOnError: false 96 | inputs: 97 | azureSubscription: $(AZURE_RM_SVC_CONNECTION) 98 | KeyVaultName: $(KEYVAULT_NAME) 99 | SecretsFilter: "*" 100 | RunAsPreJob: false 101 | name: load_keyvault_secrets 102 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/logging/__init__.py: -------------------------------------------------------------------------------- 1 | """Logging common module.""" -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/logging/logger.py: -------------------------------------------------------------------------------- 1 | """Reusable logger for model_factory.""" 2 | import logging 3 | import sys 4 | 5 | 6 | def get_logger(name: str = "dataops", level: int = logging.DEBUG) -> logging.Logger: 7 | """Get logger for dataops module. 8 | 9 | Args: 10 | name (str, optional): Logger name. Defaults to "dataops". 11 | level (int, optional): Log level. Defaults to logging.DEBUG. 12 | 13 | Returns: 14 | logging.Logger: named logger. 15 | """ 16 | logger = logging.getLogger(name) 17 | if logger.hasHandlers(): 18 | return logger 19 | 20 | handler = logging.StreamHandler(sys.stdout) 21 | formatter = logging.Formatter( 22 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 23 | ) 24 | handler.setFormatter(formatter) 25 | 26 | logger.setLevel(level) 27 | logger.addHandler(handler) 28 | 29 | return logger 30 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/mlops/__init__.py: -------------------------------------------------------------------------------- 1 | """MLOps Common source.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/mlops/get_aml_client.py: -------------------------------------------------------------------------------- 1 | """Returns an MLClient object using the provided credentials.""" 2 | 3 | import os 4 | from azure.identity import EnvironmentCredential 5 | from azure.ai.ml import MLClient 6 | 7 | 8 | def get_aml_client( 9 | client_id: str, 10 | client_secret: str, 11 | tenant_id: str, 12 | subscription_id: str, 13 | resource_group_name: str, 14 | workspace_name: str, 15 | ): 16 | """Create MLClient object using the provided credentials.""" 17 | client = None 18 | try: 19 | os.environ["AZURE_CLIENT_ID"] = client_id 20 | os.environ["AZURE_CLIENT_SECRET"] = client_secret 21 | os.environ["AZURE_TENANT_ID"] = tenant_id 22 | 23 | credential = EnvironmentCredential() 24 | client = MLClient( 25 | credential, 26 | subscription_id=subscription_id, 27 | resource_group_name=resource_group_name, 28 | workspace_name=workspace_name, 29 | ) 30 | 31 | if client is None: 32 | raise Exception("Unable to create MLClient object.") 33 | return client 34 | except Exception as ex: 35 | print(f"Exception while creating MLClient: {ex}") 36 | raise 37 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/mlops/get_compute.py: -------------------------------------------------------------------------------- 1 | """Return AML Compute instance.""" 2 | import argparse 3 | from azure.ai.ml import MLClient 4 | from azure.identity import DefaultAzureCredential 5 | from azure.ai.ml.entities import AmlCompute 6 | from azure.core.exceptions import ResourceNotFoundError 7 | from common.logging.logger import get_logger 8 | 9 | logger = get_logger("common_mlops") 10 | 11 | 12 | def get_compute( 13 | subscription_id: str, 14 | resource_group_name: str, 15 | workspace_name: str, 16 | cluster_name: str, 17 | cluster_size: str, 18 | cluster_region: str, 19 | min_instances: int, 20 | max_instances: int, 21 | idle_time_before_scale_down: int, 22 | ): 23 | """Return AML Compute instance. 24 | 25 | Args: 26 | subscription_id (str): subscription id 27 | resource_group_name (str): resource group name 28 | workspace_name (str): workspace name 29 | cluster_name (str): cluster name 30 | cluster_size (str): cluster size 31 | cluster_region (str): cluster region 32 | min_instances (int): min instances 33 | max_instances (int): max instances 34 | idle_time_before_scale_down (int): idle time 35 | 36 | Returns: 37 | _type_: AML Compute 38 | """ 39 | compute_object = None 40 | try: 41 | client = MLClient( 42 | DefaultAzureCredential(), 43 | subscription_id=subscription_id, 44 | resource_group_name=resource_group_name, 45 | workspace_name=workspace_name, 46 | ) 47 | compute_object = client.compute.get(cluster_name) 48 | logger.info(f"Found existing compute target {cluster_name}, so using it.") 49 | except ResourceNotFoundError: 50 | logger.info(f"{cluster_name} is not found! Trying to create a new one.") 51 | compute_object = AmlCompute( 52 | name=cluster_name, 53 | type="amlcompute", 54 | size=cluster_size, 55 | location=cluster_region, 56 | min_instances=min_instances, 57 | max_instances=max_instances, 58 | idle_time_before_scale_down=idle_time_before_scale_down, 59 | ) 60 | logger.info("Able to create AML Compute object") 61 | compute_object = client.compute.begin_create_or_update( 62 | compute_object 63 | ).result() 64 | logger.info(f"A new cluster {cluster_name} has been created.") 65 | except Exception: 66 | logger.exception(f"Not able to access compute, exception") 67 | raise 68 | return compute_object 69 | 70 | 71 | def main(): 72 | """Return AML Compute instance.""" 73 | parser = argparse.ArgumentParser("get_compute") 74 | parser.add_argument("--subscription_id", type=str, help="Azure subscription id") 75 | parser.add_argument( 76 | "--resource_group_name", type=str, help="Azure Machine learning resource group" 77 | ) 78 | parser.add_argument( 79 | "--workspace_name", type=str, help="Azure Machine learning Workspace name" 80 | ) 81 | parser.add_argument( 82 | "--cluster_name", type=str, help="Azure Machine learning cluster name" 83 | ) 84 | parser.add_argument( 85 | "--cluster_size", type=str, help="Azure Machine learning cluster size" 86 | ) 87 | parser.add_argument( 88 | "--cluster_region", type=str, help="Azure Machine learning cluster region" 89 | ) 90 | parser.add_argument("--min_instances", type=int, default=0) 91 | parser.add_argument("--max_instances", type=int, default=4) 92 | parser.add_argument("--idle_time_before_scale_down", type=int, default=120) 93 | 94 | args = parser.parse_args() 95 | get_compute( 96 | args.subscription_id, 97 | args.resource_group_name, 98 | args.workspace_name, 99 | args.cluster_name, 100 | args.cluster_size, 101 | args.cluster_region, 102 | args.min_instances, 103 | args.max_instances, 104 | args.idle_time_before_scale_down, 105 | ) 106 | 107 | 108 | if __name__ == "__main__": 109 | main() 110 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/mlops/get_environment.py: -------------------------------------------------------------------------------- 1 | """Return AML workspace environment.""" 2 | import argparse 3 | from azure.ai.ml import MLClient 4 | from azure.identity import DefaultAzureCredential 5 | from azure.ai.ml.entities import Environment 6 | from common.logging.logger import get_logger 7 | 8 | logger = get_logger("common_mlops") 9 | 10 | 11 | def get_environment( 12 | subscription_id: str, 13 | resource_group_name: str, 14 | workspace_name: str, 15 | env_base_image_name: str, 16 | conda_path: str, 17 | environment_name: str, 18 | description: str, 19 | ): 20 | """Get AML workspace environment. 21 | 22 | Args: 23 | subscription_id (str): subscription id 24 | resource_group_name (str): resource group name 25 | workspace_name (str): worksapce name 26 | env_base_image_name (str): env base image name 27 | conda_path (str): conda path 28 | environment_name (str): env name 29 | description (str): description 30 | 31 | Returns: 32 | _type_: workspace env 33 | """ 34 | try: 35 | logger.info(f"Checking {environment_name} environment.") 36 | client = MLClient( 37 | DefaultAzureCredential(), 38 | subscription_id=subscription_id, 39 | resource_group_name=resource_group_name, 40 | workspace_name=workspace_name, 41 | ) 42 | env_docker_conda = Environment( 43 | image=env_base_image_name, 44 | conda_file=conda_path, 45 | name=environment_name, 46 | description=description, 47 | ) 48 | environment = client.environments.create_or_update(env_docker_conda) 49 | logger.info(f"Environment {environment_name} has been created or updated.") 50 | return environment 51 | 52 | except Exception: 53 | logger.exception(f"Not able to get environment") 54 | raise 55 | 56 | 57 | def main(): 58 | """Return AML workspace environment.""" 59 | parser = argparse.ArgumentParser("prepare_environment") 60 | parser.add_argument("--subscription_id", type=str, help="Azure subscription id") 61 | parser.add_argument( 62 | "--resource_group_name", type=str, help="Azure Machine learning resource group" 63 | ) 64 | parser.add_argument( 65 | "--workspace_name", type=str, help="Azure Machine learning Workspace name" 66 | ) 67 | parser.add_argument( 68 | "--env_base_image_name", type=str, help="Environment custom base image name" 69 | ) 70 | parser.add_argument( 71 | "--conda_path", type=str, help="path to conda requirements file" 72 | ) 73 | parser.add_argument( 74 | "--environment_name", type=str, help="Azure Machine learning environment name" 75 | ) 76 | parser.add_argument( 77 | "--description", type=str, default="Environment created using Conda." 78 | ) 79 | args = parser.parse_args() 80 | 81 | get_environment( 82 | args.subscription_id, 83 | args.resource_group_name, 84 | args.workspace_name, 85 | args.env_base_image_name, 86 | args.conda_path, 87 | args.environment_name, 88 | args.description, 89 | ) 90 | 91 | 92 | if __name__ == "__main__": 93 | main() 94 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/common/mlops/get_workspace.py: -------------------------------------------------------------------------------- 1 | """Retrieve Azure Machine Learning workspace.""" 2 | import argparse 3 | from azure.ai.ml import MLClient 4 | from azure.identity import DefaultAzureCredential 5 | from common.logging.logger import get_logger 6 | 7 | logger = get_logger("common_mlops") 8 | 9 | 10 | def get_workspace(subscription_id: str, resource_group_name: str, workspace_name: str): 11 | """Get AML workspace. 12 | 13 | Args: 14 | subscription_id (str): subscription id 15 | resource_group_name (str): resource group name 16 | workspace_name (str): workspace name 17 | 18 | Returns: 19 | _type_: Workspace 20 | """ 21 | try: 22 | logger.info(f"Getting access to {workspace_name} workspace.") 23 | client = MLClient( 24 | DefaultAzureCredential(), 25 | subscription_id=subscription_id, 26 | resource_group_name=resource_group_name, 27 | workspace_name=workspace_name, 28 | ) 29 | 30 | workspace = client.workspaces.get(workspace_name) 31 | logger.info(f"Reference to {workspace_name} has been obtained.") 32 | return workspace 33 | except Exception: 34 | logger.exception(f"Not able to access workspace") 35 | raise 36 | 37 | 38 | def main(): 39 | """Return AML workspace.""" 40 | parser = argparse.ArgumentParser("get_workspace") 41 | parser.add_argument("--subscription_id", type=str, help="Azure subscription id") 42 | parser.add_argument( 43 | "--resource_group_name", type=str, help="Azure Machine learning resource group" 44 | ) 45 | parser.add_argument( 46 | "--workspace_name", type=str, help="Azure Machine learning Workspace name" 47 | ) 48 | 49 | args = parser.parse_args() 50 | get_workspace(args.subscription_id, args.resource_group_name, args.workspace_name) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash 2 | 3 | setup: 4 | python -m venv /tmp/fridge_env 5 | . /tmp/fridge_env/bin/activate 6 | python -m pip install --upgrade pip 7 | python -m pip install -r ./devops/pipelines/requirements/build_validation_requirements.txt 8 | 9 | lint: setup 10 | flake8 . -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/__init__.py: -------------------------------------------------------------------------------- 1 | """The model factory for fridge object detection use case.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/config/model_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "models": [ 3 | { 4 | "ML_MODEL_CONFIG_NAME": "fridge_obj_det", 5 | "ENV_NAME": "dev", 6 | "AZURE_RM_SVC_CONNECTION": "mfdemosc", 7 | "CLUSTER_NAME": "dev-pipeline", 8 | "CLUSTER_REGION": "eastus", 9 | "CLUSTER_SIZE": "STANDARD_DS3_V2", 10 | "CONDA_PATH": "fridge_obj_det/mlops/environment/conda.yml", 11 | "DISPLAY_BASE_NAME": "mlops", 12 | "ENV_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04", 13 | "ENVIRONMENT_NAME": "automl", 14 | "EXPERIMENT_BASE_NAME": "automl-fridge-objects-detection-experiment", 15 | "HOST_PORT_NUMBER": "8081", 16 | "KEYVAULT_NAME": "mfdemoaml7626959246", 17 | "ML_SCOPE_SVC_CONNECTION": "mlops_scoped", 18 | "MODEL_BASE_NAME": "fasterrcnn_resnet18_fpn", 19 | "REGISTRY_NAME": "mfdemoacr", 20 | "RESOURCE_GROUP_NAME": "mfdemorg", 21 | "SCORE_FILE_NAME": "score.py", 22 | "WORKSPACE_NAME": "mfdemoaml", 23 | "AML_ARTIFACT_PATH": "INPUT_onnx_model_artifacts_folder/train_artifacts" 24 | }, 25 | { 26 | "ML_MODEL_CONFIG_NAME": "fridge_obj_det", 27 | "ENV_NAME": "prod", 28 | "AZURE_RM_SVC_CONNECTION": "mfdemosc", 29 | "CLUSTER_NAME": "dev-pipeline", 30 | "CLUSTER_REGION": "eastus", 31 | "CLUSTER_SIZE": "STANDARD_DS3_V2", 32 | "CONDA_PATH": "fridge_obj_det/mlops/environment/conda.yml", 33 | "DISPLAY_BASE_NAME": "mlops", 34 | "ENV_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04", 35 | "ENVIRONMENT_NAME": "automl", 36 | "EXPERIMENT_BASE_NAME": "automl-fridge-objects-detection-experiment", 37 | "HOST_PORT_NUMBER": "8081", 38 | "KEYVAULT_NAME": "mfdemoaml7626959246", 39 | "ML_SCOPE_SVC_CONNECTION": "mlops_scoped", 40 | "MODEL_BASE_NAME": "fasterrcnn_resnet18_fpn", 41 | "REGISTRY_NAME": "mfdemoacr", 42 | "RESOURCE_GROUP_NAME": "mfdemorg", 43 | "SCORE_FILE_NAME": "score.py", 44 | "WORKSPACE_NAME": "mfdemoaml", 45 | "AML_ARTIFACT_PATH": "INPUT_onnx_model_artifacts_folder/train_artifacts" 46 | } 47 | ] 48 | } 49 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/build_validation_pipeline.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: model_type 3 | displayName: "type of model to execute" 4 | 5 | jobs: 6 | - job: Build_Validation_Pipeline 7 | workspace: 8 | clean: outputs | resources | all 9 | steps: 10 | - task: UsePythonVersion@0 11 | displayName: 'Use Python 3.8' 12 | inputs: 13 | versionSpec: '3.8' 14 | addToPath: true 15 | 16 | - script: | 17 | python -m venv env 18 | source env/bin/activate 19 | python -m pip install --upgrade pip 20 | python -m pip install -r model_factory/${{parameters.model_type}}/devops/pipelines/requirements/build_validation_requirements.txt 21 | displayName: "Load Python Dependencies" 22 | 23 | - script: | 24 | source env/bin/activate 25 | flake8 ./model_factory/${{parameters.model_type}} 26 | displayName: "Lint with flake8" 27 | 28 | - script: | 29 | source env/bin/activate 30 | cd model_factory 31 | export PYTHONPATH=. 32 | pytest ./${{parameters.model_type}} --ignore=sandbox/ --junitxml=junit/test-results.xml --cov=. --cov-report=xml 33 | displayName: 'Run Unit Tests' 34 | condition: succeededOrFailed() 35 | 36 | - task: PublishTestResults@2 37 | condition: succeededOrFailed() 38 | inputs: 39 | testResultsFiles: '**/test-*.xml' 40 | testRunTitle: 'Publish Test Results for Python $(python.version)' 41 | 42 | - task: PublishCodeCoverageResults@1 43 | inputs: 44 | codeCoverageTool: Cobertura 45 | summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/fridge_obj_det_dev_pipeline.yml: -------------------------------------------------------------------------------- 1 | # pr: 2 | # branches: 3 | # include: 4 | # # - feature/{feature-branch-name} 5 | # # - add feature branches that will build more features on top of frdige-obj-detection 6 | # paths: 7 | # include: 8 | # - model_factory/fridge_obj_det/* 9 | # - model_factory/common/devops/* 10 | 11 | # trigger: 12 | # branches: 13 | # include: 14 | # # - feature/{feature-branch-name} 15 | # # - add feature branches that will build more features on top of frdige-obj-detection 16 | # paths: 17 | # include: 18 | # - model_factory/fridge_obj_det/* 19 | # - model_factory/common/devops/* 20 | 21 | # Replace this section with the above changes, when there is a new feature branch 22 | # that will build on top of fridge-obj-detection 23 | pr: none 24 | trigger: none 25 | 26 | parameters: 27 | - name: exec_environment 28 | displayName: "Execution Environment" 29 | default: "dev" 30 | - name: model_type 31 | displayName: "type of model to execute" 32 | default: "fridge_obj_det" 33 | 34 | variables: 35 | - group: mlops_platform_${{parameters.exec_environment}}_vg 36 | - name: PIPELINE_TYPE 37 | value: ${{parameters.model_type}} 38 | 39 | stages: 40 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/variables_template.yml 41 | parameters: 42 | exec_environment: ${{parameters.exec_environment}} 43 | model_type: ${{parameters.model_type}} 44 | 45 | - stage: build_validation 46 | displayName: build_validation 47 | dependsOn: 48 | - variable_generation 49 | variables: 50 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/experiment_variables.yml 51 | jobs: 52 | - template: build_validation_pipeline.yml 53 | parameters: 54 | model_type: ${{ parameters.model_type }} 55 | 56 | - ${{ if ne(variables['Build.Reason'], 'PullRequest') }}: 57 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/platform_dev_pipeline.yml 58 | parameters: 59 | exec_environment: ${{ parameters.exec_environment }} 60 | model_type: ${{ parameters.model_type }} 61 | 62 | 63 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/fridge_obj_det_main_pipeline.yml: -------------------------------------------------------------------------------- 1 | trigger: 2 | none 3 | pr: 4 | branches: 5 | include: 6 | - master 7 | paths: 8 | include: 9 | - model_factory/fridge_obj_det/* 10 | - model_factory/common/devops/* 11 | 12 | parameters: 13 | - name: exec_environment 14 | displayName: "Execution Environment" 15 | default: "prod" 16 | - name: model_type 17 | displayName: "type of model to execute" 18 | default: "fridge_obj_det" 19 | - name: TARGET_CONDITION 20 | displayName: "Target Condition for the deployment" 21 | default: "tags.device='smoke-test-arm'" 22 | 23 | variables: 24 | - group: mlops_platform_${{parameters.exec_environment}}_vg 25 | - name: PIPELINE_TYPE 26 | value: ${{parameters.model_type}} 27 | 28 | stages: 29 | # Generate Variables 30 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/variables_template.yml 31 | parameters: 32 | exec_environment: ${{parameters.exec_environment}} 33 | model_type: ${{parameters.model_type}} 34 | 35 | # Run Linting and Unit tests 36 | - stage: build_validation 37 | displayName: build_validation 38 | dependsOn: 39 | - variable_generation 40 | variables: 41 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/experiment_variables.yml 42 | jobs: 43 | - template: build_validation_pipeline.yml 44 | parameters: 45 | model_type: ${{ parameters.model_type }} 46 | 47 | - ? ${{ if and(or(eq(variables['Build.Reason'], 'PullRequest'),eq(variables['Build.Reason'], 'Manual')), ne(variables['Build.SourceBranch'], 'refs/heads/master')) }} 48 | : - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/platform_main_pipeline.yml 49 | parameters: 50 | exec_environment: "dev" 51 | model_type: ${{ parameters.model_type }} 52 | TARGET_CONDITION: ${{ parameters.TARGET_CONDITION }} 53 | 54 | - ${{ if eq(variables['Build.SourceBranch'], 'refs/heads/master') }}: 55 | - stage: build_and_push_to_prod 56 | dependsOn: 57 | - variable_generation 58 | - build_validation 59 | variables: 60 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/experiment_variables.yml 61 | jobs: 62 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/image_generation_template.yml 63 | parameters: 64 | exec_environment: ${{parameters.exec_environment}} 65 | model_type: ${{parameters.model_type}} 66 | model_name: "${{parameters.model_type}}_prod_master" 67 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/fridge_obj_det_mlops_pipeline.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | - name: exec_environment 3 | displayName: "Execution Environment" 4 | default: "dev" 5 | - name: model_type 6 | displayName: "type of model to execute" 7 | - name: MODEL_NAME 8 | displayName: "model name" 9 | default: $(MODEL_NAME) 10 | steps: 11 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/execute_mlops_pipeline.yml 12 | parameters: 13 | script_parameter: | 14 | python -m ${{ parameters.model_type }}.mlops.src.mlops_pipeline \ 15 | --subscription_id $(SUBSCRIPTION_ID) \ 16 | --resource_group_name $(RESOURCE_GROUP_NAME) \ 17 | --workspace_name $(WORKSPACE_NAME) \ 18 | --cluster_name $(CLUSTER_NAME) \ 19 | --cluster_size $(CLUSTER_SIZE) \ 20 | --cluster_region $(CLUSTER_REGION) \ 21 | --build_reference $(BUILD.BUILDID) \ 22 | --deploy_environment ${{parameters.exec_environment}} \ 23 | --experiment_name $(EXPERIMENT_NAME) \ 24 | --display_name $(DISPLAY_NAME) \ 25 | --wait_for_completion True \ 26 | --environment_name $(ENVIRONMENT_NAME) \ 27 | --env_base_image_name $(ENV_BASE_IMAGE_NAME) \ 28 | --model_name ${{parameters.MODEL_NAME}} \ 29 | --conda_path $(CONDA_PATH) 30 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/requirements/build_validation_requirements.txt: -------------------------------------------------------------------------------- 1 | flake8-docstrings==1.6.0 2 | flake8==6.0.0 3 | pep8-naming==0.13.0 4 | pytest-cov==3.0.0 5 | pytest-azurepipelines==1.0.3 6 | pytest-mock==3.7.0 7 | pytest==7.1.2 8 | azure-ai-ml==1.9.0 9 | azure-identity==1.13.0 10 | onnx==1.14.0 11 | onnxconverter-common==1.13.0 12 | typer==0.9.0 -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/requirements/execute_job_requirements.txt: -------------------------------------------------------------------------------- 1 | azure-cli==2.47.0 2 | azure-ai-ml==1.9.0 3 | azure-identity==1.13.0 4 | azure-keyvault-secrets==4.7.0 5 | requests==2.31.0 6 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/environment/requirements.txt: -------------------------------------------------------------------------------- 1 | azureml-defaults -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/.gitkeep -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/__init__.py: -------------------------------------------------------------------------------- 1 | """Fridge objects AML training pipeline MLOps source.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/compare_map.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: map_comparision 3 | version: 1 4 | display_name: Compare 2 mAP values 5 | type: command 6 | inputs: 7 | map_before: 8 | type: uri_file 9 | map_after: 10 | type: uri_file 11 | outputs: 12 | metrics_json_file: 13 | type: uri_file 14 | environment: azureml:conda-based-devenv-py38-cpu@latest 15 | code: ../../../ 16 | command: >- 17 | python fridge_obj_det/src/compare_map/compare_map.py 18 | ${{inputs.map_before}} 19 | ${{inputs.map_after}} 20 | ${{outputs.metrics_json_file}} 21 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/convert.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: convert_onnx_fp32_to_fp16 3 | version: 1 4 | display_name: convert ONNX from fp32 to fp16 5 | type: command 6 | inputs: 7 | fp32_input_dir: 8 | type: uri_folder 9 | outputs: 10 | fp16_output_dir: 11 | type: uri_folder 12 | environment: azureml:conda-based-devenv-py38-cpu@latest 13 | code: ../../../ 14 | command: >- 15 | python fridge_obj_det/src/convert/convert_fp32_to_fp16.py 16 | ${{inputs.fp32_input_dir}} 17 | ${{outputs.fp16_output_dir}} 18 | 19 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/prep.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: prepare_fridge_objects_data 3 | display_name: Download, split and register fridge objects dataset as MLTables 4 | version: 1 5 | type: command 6 | inputs: 7 | client_id: 8 | type: string 9 | client_secret: 10 | type: string 11 | tenant_id: 12 | type: string 13 | subscription_id: 14 | type: string 15 | resource_group_name: 16 | type: string 17 | workspace_name: 18 | type: string 19 | fridge_objects_uri_folder_name: 20 | type: string 21 | default: fridge-items-images-object-detection 22 | train_mltable_name: 23 | type: string 24 | val_mltable_name: 25 | type: string 26 | test_mltable_name: 27 | type: string 28 | outputs: 29 | train_mltable: 30 | type: mltable 31 | val_mltable: 32 | type: mltable 33 | test_mltable: 34 | type: mltable 35 | code: ../../../ 36 | environment: azureml:conda-based-devenv-py38-cpu@latest 37 | command: >- 38 | python -m fridge_obj_det.src.prep.prep 39 | --client_id ${{inputs.client_id}} 40 | --client_secret ${{inputs.client_secret}} 41 | --tenant_id ${{inputs.tenant_id}} 42 | --subscription_id ${{inputs.subscription_id}} 43 | --resource_group_name ${{inputs.resource_group_name}} 44 | --workspace_name ${{inputs.workspace_name}} 45 | --fridge_objects_uri_folder_name ${{inputs.fridge_objects_uri_folder_name}} 46 | --train_mltable_name ${{inputs.train_mltable_name}} 47 | --val_mltable_name ${{inputs.val_mltable_name}} 48 | --test_mltable_name ${{inputs.test_mltable_name}} 49 | --train_mltable ${{outputs.train_mltable}} 50 | --val_mltable ${{outputs.val_mltable}} 51 | --test_mltable ${{outputs.test_mltable}} 52 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/register.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: register_onnx_model 3 | version: 1 4 | display_name: Register an ONNX model as a custom model in the AML workspace. 5 | type: command 6 | inputs: 7 | client_id: 8 | type: string 9 | client_secret: 10 | type: string 11 | tenant_id: 12 | type: string 13 | subscription_id: 14 | type: string 15 | resource_group_name: 16 | type: string 17 | workspace_name: 18 | type: string 19 | onnx_model_artifacts_folder: 20 | type: uri_folder 21 | registered_model_name: 22 | type: string 23 | registered_model_description: 24 | type: string 25 | build_reference_id: 26 | type: string 27 | metrics_json_file: 28 | type: uri_file 29 | environment: azureml:conda-based-devenv-py38-cpu@latest 30 | code: ../../../ 31 | command: >- 32 | python -m fridge_obj_det.src.register.register 33 | --client_id ${{inputs.client_id}} 34 | --client_secret ${{inputs.client_secret}} 35 | --tenant_id ${{inputs.tenant_id}} 36 | --subscription_id ${{inputs.subscription_id}} 37 | --resource_group_name ${{inputs.resource_group_name}} 38 | --workspace_name ${{inputs.workspace_name}} 39 | --input_model_artifacts_path ${{inputs.onnx_model_artifacts_folder}} 40 | --registered_model_name ${{inputs.registered_model_name}} 41 | --registered_model_description "${{inputs.registered_model_description}}" 42 | --build_reference ${{inputs.build_reference_id}} 43 | --metrics_json_path ${{inputs.metrics_json_file}} 44 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/score.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: test_map_score 3 | version: 1 4 | display_name: calculate mAP score 5 | type: command 6 | inputs: 7 | model_folder_path: 8 | type: uri_folder 9 | mltable_folder: 10 | type: uri_folder 11 | outputs: 12 | results_file: 13 | type: uri_file 14 | environment: azureml:conda-based-devenv-py38-cpu@latest 15 | code: ../../../ 16 | command: >- 17 | python fridge_obj_det/src/score/score.py 18 | ${{inputs.model_folder_path}} 19 | ${{inputs.mltable_folder}} 20 | ${{outputs.results_file}} 21 | 22 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/train.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json 2 | name: train_automl_object_detection_model 3 | display_name: Train AutoML Object Detection Model 4 | version: 1 5 | type: command 6 | inputs: 7 | client_id: 8 | type: string 9 | client_secret: 10 | type: string 11 | tenant_id: 12 | type: string 13 | subscription_id: 14 | type: string 15 | resource_group_name: 16 | type: string 17 | workspace_name: 18 | type: string 19 | training_mltable_path: 20 | type: mltable 21 | validation_mltable_path: 22 | type: mltable 23 | automl_obj_det_model_name: 24 | type: string 25 | default: fasterrcnn_resnet18_fpn 26 | automl_compute_cluster_name: 27 | type: string 28 | automl_experiment_name: 29 | type: string 30 | optional: true 31 | outputs: 32 | model_artifacts_dir: 33 | type: uri_folder 34 | code: ../../../ 35 | environment: azureml:conda-based-devenv-py38-cpu@latest 36 | command: >- 37 | python -m fridge_obj_det.src.train.train 38 | --client_id ${{inputs.client_id}} 39 | --client_secret ${{inputs.client_secret}} 40 | --tenant_id ${{inputs.tenant_id}} 41 | --subscription_id ${{inputs.subscription_id}} 42 | --resource_group_name ${{inputs.resource_group_name}} 43 | --workspace_name ${{inputs.workspace_name}} 44 | --training_mltable_path ${{inputs.training_mltable_path}} 45 | --validation_mltable_path ${{inputs.validation_mltable_path}} 46 | --automl_compute_cluster_name ${{inputs.automl_compute_cluster_name}} 47 | --automl_obj_det_model_name ${{inputs.automl_obj_det_model_name}} 48 | $[[--automl_experiment_name ${{inputs.automl_experiment_name}}]] 49 | --model_artifacts_dir ${{outputs.model_artifacts_dir}} 50 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/environment/conda.yml: -------------------------------------------------------------------------------- 1 | name: fridge-object-detection 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.8 6 | - pip 7 | - pip: 8 | - onnx==1.14.0 9 | - onnxconverter-common==1.13.0 10 | - onnxruntime-gpu==1.15.0 11 | - typer==0.9.0 12 | - scikit-learn==1.2.0 13 | - mlflow==2.3.2 14 | - azureml-mlflow==1.50.0 15 | - azure-ai-ml==1.7.2 16 | - azure-identity==1.13.0 17 | - typer==0.9.0 18 | - numpy==1.24.2 19 | - pillow==9.5.0 20 | - torch==2.0.1 21 | - torchmetrics==0.11.4 22 | - torchvision==0.15.2 23 | - mltable==1.3.0 24 | - pandas==2.0.0 25 | - azure-keyvault-secrets==4.7.0 26 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/environment/create_devenv.py: -------------------------------------------------------------------------------- 1 | """This script reads conda.yml file and creates an environment from it. 2 | 3 | Run python create_devenv.py at this directory to create the environment. 4 | This is meant to be used during development 5 | """ 6 | 7 | from azure.ai.ml import MLClient 8 | from azure.ai.ml.entities import Environment 9 | from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential 10 | 11 | try: 12 | credential = DefaultAzureCredential() 13 | # Check if given credential can get token successfully. 14 | credential.get_token("https://management.azure.com/.default") 15 | except Exception: 16 | # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work 17 | credential = InteractiveBrowserCredential() 18 | env_docker_conda = Environment( 19 | image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04", 20 | conda_file="conda.yml", 21 | name="conda-based-devenv-py38-cpu", 22 | description="Environment created from a Docker image plus Conda environment.", 23 | ) 24 | ml_client = MLClient.from_config(credential=credential) 25 | ml_client.environments.create_or_update(env_docker_conda) 26 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/src/__init__.py: -------------------------------------------------------------------------------- 1 | """The AML SDK v2 components based pipeline code.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/.python-version: -------------------------------------------------------------------------------- 1 | 3.9.12 -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim 2 | 3 | RUN apt-get update &&\ 4 | apt-get install -y --no-install-recommends gcc curl git libssl-dev openssl wget build-essential zlib1g-dev 5 | 6 | RUN curl https://pyenv.run | bash && \ 7 | echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.profile && \ 8 | echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.profile && \ 9 | echo 'eval "$(pyenv init -)"' >> ~/.profile 10 | 11 | ENV PATH="/root/.pyenv/bin:${PATH}" 12 | ENV PYENV_ROOT="/root/.pyenv" 13 | 14 | RUN pyenv install 3.9.12 &&\ 15 | pyenv global 3.9.12 16 | 17 | ENV POETRY_HOME="/root/.poetry" 18 | ENV PATH="/root/.poetry/bin:${PATH}" 19 | 20 | RUN curl -sSL https://install.python-poetry.org | python3 - 21 | RUN poetry config virtualenvs.prefer-active-python true 22 | ENV PATH="/root/.pyenv/versions/3.9.12/bin:${PATH}" 23 | 24 | # install packages 25 | COPY pyproject.toml poetry.lock .python-version /app/ 26 | WORKDIR /app 27 | RUN poetry config installer.max-workers 10 &&\ 28 | poetry install --no-interaction --no-ansi -vvv 29 | 30 | # Setup env for the the container 31 | ENV AZUREML_MODEL_DIR=/app/azureml-models 32 | ENV MODEL_LOG_PATH=/app/logs/ 33 | ENV GUNICORN_LOG_LEVEL=debug 34 | 35 | # Create dirs 36 | RUN mkdir -p $AZUREML_MODEL_DIR &&\ 37 | mkdir -p $MODEL_LOG_PATH 38 | 39 | # Copy scoring files 40 | COPY scoring /app/scoring 41 | COPY server /app/server 42 | 43 | # Copy Model 44 | COPY model_artifacts/* /app/azureml-models/ 45 | 46 | EXPOSE 8080 47 | 48 | CMD poetry run gunicorn --timeout 600 -b=0.0.0.0:8080 --capture-output --log-level ${GUNICORN_LOG_LEVEL} server.__main__:app -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/__init__.py: -------------------------------------------------------------------------------- 1 | """TODO.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/model_artifacts/labels.json: -------------------------------------------------------------------------------- 1 | ["--bg--", "can", "carton", "milk_bottle", "water_bottle"] -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/model_artifacts/test_sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/model_artifacts/test_sample.jpg -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "fridge_obj_det", 3 | "scripts": { 4 | "version": "semantic-release" 5 | } 6 | } -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "fridge-obj-det-model" 3 | version = "0.1.0" 4 | description = "Model for detecting objects that can be placed in a fridge" 5 | authors = ["Dev "] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "3.9.12" 10 | onnxruntime = "1.14.1" 11 | numpy = "1.24.2" 12 | pillow = "10.0.1" 13 | azureml-core = "1.49.0" 14 | flask = "^2.3.2" 15 | gunicorn = "20.1.0" 16 | 17 | [build-system] 18 | requires = ["poetry-core"] 19 | build-backend = "poetry.core.masonry.api" 20 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/scoring/__init__.py: -------------------------------------------------------------------------------- 1 | """TODO model package docstring.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/scoring/predict.py: -------------------------------------------------------------------------------- 1 | """TODO model docstring.""" 2 | import onnxruntime 3 | from typing import Dict, List 4 | import logging 5 | 6 | 7 | def get_batch_predictions_from_onnx( 8 | onnx_session: onnxruntime.InferenceSession, 9 | img_data_batch, 10 | model_img_width: int, 11 | model_img_height: int, 12 | object_class_names: List[str], 13 | score_threshold: float = 0.8, 14 | ) -> List[List[Dict]]: 15 | """Perform predictions with ONNX runtime for a batch of images. 16 | 17 | Returns a list for each image in img_data_batch, where each list per image 18 | is a list of bounding box predictions (dict) with the following structure: 19 | 20 | [ # list length of img_data_batch 21 | [ # per image list of bounding box predictions 22 | { 23 | 'box': { 24 | 'topX': normalised top left bounding box X co-ordinate 25 | 'topY': normalised top left bounding box Y co-ordinate 26 | 'bottomX': normalised bottom right bounding box X co-ordinate 27 | 'bottomY': normalised bottom right bounding box Y co-ordinate 28 | }, 29 | 'label': bounding box class name, 30 | 'score': bounding box confidence score 31 | } 32 | ] 33 | ] 34 | 35 | Note that bounding box co-ordinates are normalised to the range [0, 1] to allow 36 | scaling to the original image size (as the original image may have beeen resized for 37 | model prediction). 38 | 39 | Args: 40 | onnx_session (onnxruntime.InferenceSession): the ONNX runtime inference session 41 | with model loaded. 42 | img_data_batch (List[ndarray]): pre-processed list of images ready for prediction, each image 43 | should have shape CxHxW. 44 | model_img_width (int): ONNX model image width 45 | model_img_height (int): ONNX model image height 46 | object_class_names (List[str]): Ordered list of object class names, will map model 47 | prediction indices to this list to get predicted object class names. 48 | score_threshold (float): confidence score threshold to filter predictions. 49 | Defaults to 0.8. 50 | 51 | Returns: 52 | (List[List[Dict]]): List of bounding box predictions per image in 53 | img_data_batch. 54 | """ 55 | sess_input = onnx_session.get_inputs() 56 | sess_output = onnx_session.get_outputs() 57 | 58 | output_names = [output.name for output in sess_output] 59 | 60 | batch_predictions = [] 61 | for img_data in img_data_batch: 62 | try: 63 | predictions = onnx_session.run( 64 | output_names=output_names, input_feed={sess_input[0].name: [img_data]} 65 | ) 66 | batch_predictions.append(predictions) 67 | except BaseException as error: 68 | logging.error('Error at %s', 'while running predictions using onnxruntime', exc_info=error) 69 | raise 70 | 71 | logging.info("batch predictions completed, no. of predictions: %s", len(batch_predictions)) 72 | # Filter the results with threshold. 73 | filtered_boxes_batch = [] 74 | for batch_sample in batch_predictions: 75 | # in case of retinanet change the order of boxes, labels, scores to boxes, scores, labels 76 | # confirm the same from order of boxes, labels, scores output_names 77 | boxes, labels, scores = batch_sample[0], batch_sample[1], batch_sample[2] 78 | bounding_boxes = _get_prediction( 79 | boxes, 80 | labels, 81 | scores, 82 | (model_img_height, model_img_width), 83 | object_class_names, 84 | ) 85 | filtered_bounding_boxes = [ 86 | box for box in bounding_boxes if box["score"] >= score_threshold 87 | ] 88 | filtered_boxes_batch.append(filtered_bounding_boxes) 89 | logging.info("No. of filtered predictions: %s", len(filtered_boxes_batch)) 90 | return filtered_boxes_batch 91 | 92 | 93 | def _get_box_dims(image_shape, box): 94 | box_keys = ["topX", "topY", "bottomX", "bottomY"] 95 | height, width = image_shape[0], image_shape[1] 96 | 97 | box_dims = dict(zip(box_keys, [coordinate.item() for coordinate in box])) 98 | 99 | box_dims["topX"] = box_dims["topX"] * 1.0 / width 100 | box_dims["bottomX"] = box_dims["bottomX"] * 1.0 / width 101 | box_dims["topY"] = box_dims["topY"] * 1.0 / height 102 | box_dims["bottomY"] = box_dims["bottomY"] * 1.0 / height 103 | return box_dims 104 | 105 | 106 | def _get_prediction(boxes, labels, scores, image_shape, classes): 107 | bounding_boxes = [] 108 | for box, label_index, score in zip(boxes, labels, scores): 109 | box_dims = _get_box_dims(image_shape, box) 110 | 111 | box_record = { 112 | "box": box_dims, 113 | "label": classes[label_index], 114 | "score": score.item(), 115 | } 116 | 117 | bounding_boxes.append(box_record) 118 | 119 | return bounding_boxes 120 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/scoring/prepare.py: -------------------------------------------------------------------------------- 1 | """TODO model package docstring.""" 2 | from typing import List 3 | from PIL import Image 4 | import numpy as np 5 | import io 6 | 7 | 8 | def preprocess_image_for_prediction(image: Image, height_onnx: int, width_onnx: int): 9 | """Perform pre-processing on raw input image. 10 | 11 | Transform, resize and normalize the image for expected Faster-RCNN ONNX prediction. 12 | 13 | Args: 14 | image (Image): PIL.Image loaded image. 15 | height_onnx (int): ONNX model expected image height. 16 | width_onnx (int): ONNX model expected image width. 17 | 18 | Returns: 19 | ndarray: Pre-processed image in numpy format, shape: 1xCxHxW 20 | """ 21 | image = image.convert("RGB") 22 | image = image.resize((width_onnx, height_onnx)) 23 | np_image = np.array(image) 24 | # HWC -> CHW 25 | np_image = np_image.transpose(2, 0, 1) # CxHxW 26 | # normalize the image 27 | mean_vec = np.array([0.485, 0.456, 0.406]) 28 | std_vec = np.array([0.229, 0.224, 0.225]) 29 | norm_img_data = np.zeros(np_image.shape).astype("float32") 30 | for i in range(np_image.shape[0]): 31 | norm_img_data[i, :, :] = (np_image[i, :, :] / 255 - mean_vec[i]) / std_vec[i] 32 | np_image = np.expand_dims(norm_img_data, axis=0) # 1xCxHxW 33 | return np_image 34 | 35 | 36 | def prepare_image_prediction_batch( 37 | batch_image_files: List[str], 38 | model_img_width: int, 39 | model_img_height: int, 40 | batch_size: int, 41 | ) -> List: 42 | """Pre-process list of image filenames for inference. 43 | 44 | Transform list of images (of batch_size) into a batch that is ready to be passed into 45 | model prediction. The result of this function can be passed to get_batch_predictions_from_onnx() 46 | for inference results on each image. 47 | 48 | Args: 49 | batch_image_files (List[str]): local file paths to images, length must be 50 | same as batch_size 51 | model_img_width (int): expected ONNX model input image width 52 | model_img_height (int): expected ONNX model input image height 53 | batch_size (int): batch size of images to prepare, should be equal 54 | to len(batch_image_files) 55 | 56 | Returns: 57 | List[ndarray]: pre-processed image batch with length == batch_image_files 58 | """ 59 | img_processed_list = [] 60 | for i in range(batch_size): 61 | img = Image.open(io.BytesIO(batch_image_files[i])) 62 | img_processed_list.append( 63 | preprocess_image_for_prediction(img, model_img_height, model_img_width) 64 | ) 65 | 66 | if len(img_processed_list) > 1: 67 | img_data = np.concatenate(img_processed_list) 68 | elif len(img_processed_list) == 1: 69 | img_data = img_processed_list[0] 70 | else: 71 | img_data = None 72 | 73 | assert batch_size == img_data.shape[0] 74 | 75 | return img_data 76 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/scoring/score.py: -------------------------------------------------------------------------------- 1 | """TODO model package docstring.""" 2 | from typing import List, Tuple 3 | import logging 4 | import onnxruntime 5 | import json 6 | import os 7 | import base64 8 | from scoring.prepare import prepare_image_prediction_batch 9 | from scoring.predict import get_batch_predictions_from_onnx 10 | 11 | 12 | def _load_onnx_session( 13 | class_labels_json, onnx_model_path 14 | ) -> Tuple[onnxruntime.InferenceSession, List[str]]: 15 | """Load model ONNX inference session. 16 | 17 | Load ONNX model and associated class labels name list to associate model 18 | prediction indices into an ONNX inference session. 19 | 20 | Args: 21 | class_labels_json (str): path to local object class labels JSON file 22 | onnx_model_path (str): onnx model binary path 23 | 24 | Returns: 25 | Tuple[onnxruntime.InferenceSession, List[str]]: a tuple of the loaded 26 | ONNX inference session and the class labels list 27 | """ 28 | with open(class_labels_json) as f: 29 | class_names = json.load(f) 30 | session = onnxruntime.InferenceSession( 31 | onnx_model_path, 32 | providers=['CPUExecutionProvider'] 33 | ) 34 | return session, class_names 35 | 36 | 37 | def get_onnx_model_img_dims( 38 | onnx_session: onnxruntime.InferenceSession, 39 | ) -> Tuple[int, int]: 40 | """For a loaded ONNX model, get the expected image width and height to correctly perform inference. 41 | 42 | Args: 43 | onnx_session (onnxruntime.InferenceSession): loaded ONNX model 44 | 45 | Returns: 46 | Tuple[int, int]: (ONNX model image width, ONNX model image height) 47 | """ 48 | batch, channel, height_onnx, width_onnx = onnx_session.get_inputs()[0].shape 49 | return width_onnx, height_onnx 50 | 51 | 52 | def init(): 53 | """TODO model package docstring.""" 54 | logging.info("Init started") 55 | classes_json_file_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), "labels.json") 56 | onnx_file_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), "model.onnx") 57 | onnx_session, class_names = _load_onnx_session(classes_json_file_path, onnx_file_path) 58 | width_onnx, height_onnx = get_onnx_model_img_dims(onnx_session) 59 | 60 | logging.info("Loaded models in the memory") 61 | 62 | global inference_variables 63 | inference_variables = {} 64 | inference_variables["onnx_session"] = onnx_session 65 | inference_variables["class_names"] = class_names 66 | inference_variables["width_onnx"] = width_onnx 67 | inference_variables["height_onnx"] = height_onnx 68 | logging.info("Init complete") 69 | 70 | 71 | def run(raw_data): 72 | """TODO model package docstring.""" 73 | # convert base64 string to images 74 | logging.info("Received a request to images") 75 | 76 | request = json.loads(raw_data) 77 | prediction_image_bytes = [] 78 | for encoded_image_data in request["images"]: 79 | imgdata = base64.b64decode(encoded_image_data) 80 | prediction_image_bytes.append(imgdata) 81 | 82 | batch_size = len(prediction_image_bytes) 83 | logging.info("Request contains {} image(s) for inference".format(batch_size)) 84 | 85 | # Prepare the batch of images to send to ONNX model for prediction 86 | predictions_img_batch = prepare_image_prediction_batch( 87 | batch_image_files=prediction_image_bytes, 88 | model_img_width=inference_variables["width_onnx"], 89 | model_img_height=inference_variables["height_onnx"], 90 | batch_size=batch_size, 91 | ) 92 | logging.info("Prepared the batch of images") 93 | 94 | # Get the model object predictions for each image in the batch 95 | bbox_predictions = get_batch_predictions_from_onnx( 96 | onnx_session=inference_variables["onnx_session"], 97 | img_data_batch=predictions_img_batch, 98 | model_img_width=inference_variables["width_onnx"], 99 | model_img_height=inference_variables["height_onnx"], 100 | object_class_names=inference_variables["class_names"], 101 | score_threshold=0.8, 102 | ) 103 | logging.info("Done with the prediction: Results are {}", bbox_predictions) 104 | 105 | return bbox_predictions 106 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/server/__main__.py: -------------------------------------------------------------------------------- 1 | """Flask application wrapper for the serving the models.""" 2 | 3 | 4 | import logging 5 | from flask import Flask, request, jsonify 6 | from scoring.score import init, run 7 | app = Flask('app') 8 | init() 9 | 10 | 11 | @app.route('/healthcheck', methods=['GET']) 12 | def health_check(): 13 | """ 14 | Define an API endpoint that checks the health of the server. 15 | 16 | Returns: 17 | - (JSON): {"status": "OK"} 18 | """ 19 | return jsonify({"status": "OK"}) 20 | 21 | 22 | @app.route('/score', methods=['POST']) 23 | def score(): 24 | """ 25 | Define an API endpoint to score input data. 26 | 27 | Parameters: 28 | - raw_data (bytes): The raw input data to score, sent in the request body. 29 | 30 | Returns: 31 | - (JSON): The scoring results from the `run()` function. 32 | """ 33 | return run(raw_data=request.data) 34 | 35 | 36 | if __name__ == '__main__': 37 | app.run(host='0.0.0.0', port=5001) 38 | else: 39 | gunicorn_logger = logging.getLogger('gunicorn.error') 40 | app.logger.handlers = gunicorn_logger.handlers 41 | app.logger.setLevel(gunicorn_logger.level) 42 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/__init__.py: -------------------------------------------------------------------------------- 1 | """Fridge objects training pipeline src code.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/compare_map/__init__.py: -------------------------------------------------------------------------------- 1 | """TODO fill in.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/compare_map/compare_map.py: -------------------------------------------------------------------------------- 1 | """Convert all onnx models in input_dir and downcast the weights to fp16.""" 2 | from warnings import warn 3 | from decimal import Decimal 4 | 5 | import json 6 | import typer 7 | from pathlib import Path 8 | 9 | 10 | def extract_map( 11 | results_file: Path 12 | ) -> float: 13 | """Extract mAP@0.5 from results.json file. 14 | 15 | Args: 16 | results_file (Path): Path to results file containing 17 | results.json file with mAP metrics. 18 | 19 | Returns: 20 | float: mAP@0.5 score 21 | """ 22 | with open(results_file, 'r') as f: 23 | results = json.load(f) 24 | 25 | map_score = results['map_50'] 26 | return map_score 27 | 28 | 29 | def create_metrics_json( 30 | map_before: float, 31 | map_after: float, 32 | metrics_json_file: Path 33 | ): 34 | """Create metrics json file with mAP scores. 35 | 36 | Args: 37 | map_before (float): fp32 mAP score. 38 | map_after (float): fp16 mAP score. 39 | metrics_json_file (Path): Path to metrics json file for writing. 40 | """ 41 | map_dict = { 42 | "map_onnx_fp16": map_after, 43 | "map_onnx_fp32": map_before} 44 | json_content = json.dumps(map_dict) 45 | with open(metrics_json_file, "w") as f: 46 | f.write(json_content) 47 | 48 | 49 | def compare_scores( 50 | map_before: float, 51 | map_after: float, 52 | tolerance: float = 0.01, 53 | throws_error: bool = False, 54 | ): 55 | """Compare mAP before and after onnx fp16 conversion and raise ValueError or print warning if mAP drop is beyond tolerance. 56 | 57 | Args: 58 | map_before (float): mAP metric before onnx fp16 conversion. 59 | map_after (float): mAP metric after onnx fp16 conversion. 60 | tolerance (float, optional): threshold to tolerate mAP value drop. 61 | map_before - map_after <= tolerance will be considered as acceptable. 62 | Defaults to 0.01. 63 | throws_error (bool, optional): whether to throw error when mAP drop is 64 | beyond tolerance. When this is off, it will print warning instead. 65 | Defaults to False. 66 | 67 | Raises: 68 | ValueError: raised when throws_error is True and mAP dropped beyond tolerance 69 | """ 70 | # without Decimal, 71 | # >>> a = 0.98 72 | # >>> b = 0.97 73 | # >>> a-b 74 | # 0.010000000000000009 75 | # this causes a - b > 0.01 to be True: 76 | # >>> float(Decimal(str(a)) - Decimal(str(b))) == 0.01 77 | # True 78 | map_before = Decimal(str(map_before)) 79 | map_after = Decimal(str(map_after)) 80 | if float(map_before - map_after) > tolerance: 81 | if throws_error: 82 | raise ValueError( 83 | f"mAP dropped {map_before - map_after} beyond tolerance {tolerance}." 84 | f" mAP before conversion: {map_before}," 85 | f" mAP after conversion: {map_after}" 86 | ) 87 | else: 88 | warn( 89 | f"mAP dropped {map_before - map_after} beyond tolerance {tolerance}." 90 | f" mAP before conversion: {map_before}," 91 | f" mAP after conversion: {map_after}" 92 | ) 93 | if 0 <= float(map_before - map_after) <= tolerance: 94 | print( 95 | f"mAP dropped {map_before - map_after} within tolerance {tolerance}." 96 | f" mAP before conversion: {map_before}, mAP after conversion: {map_after}" 97 | ) 98 | if float(map_before - map_after) < 0: 99 | print( 100 | f"mAP increased {abs(map_before - map_after)}." 101 | f" mAP before conversion: {map_before}, mAP after conversion: {map_after}" 102 | ) 103 | 104 | 105 | def compare_map_before_and_after_conversion( 106 | fp32_results_file: Path, 107 | fp16_results_file: Path, 108 | metrics_json_file: Path, 109 | tolerance: float = 0.01, 110 | throws_error: bool = False, 111 | ): 112 | """Compare mAP before and after onnx fp16 conversion and raise ValueError or print warning if mAP drop is beyond tolerance. 113 | 114 | Reads metrics files from scoring steps and extracts mAP@0.5 before comparing 115 | them. Raises an error/prints a warning if the mAP drop is beyond tolerance, 116 | and writes a json file with these metrics. 117 | 118 | Args: 119 | fp32_results_file (Path): mAP metrics before onnx fp16 conversion 120 | fp16_results_file (Path): mAP metrics after onnx fp16 conversion 121 | metrics_json_file (str): Path to metrics json file for writing. 122 | tolerance (float, optional): threshold to tolerate mAP value drop. 123 | map_before - map_after <= tolerance will be considered as acceptable. 124 | Defaults to 0.01. 125 | throws_error (bool, optional): whether to throw error when mAP drop is 126 | beyond tolerance. When this is off, it will print warning instead. 127 | Defaults to False. 128 | 129 | Raises: 130 | ValueError: raised when throws_error is True and mAP dropped beyond tolerance 131 | """ 132 | map_before = extract_map(fp32_results_file) 133 | map_after = extract_map(fp16_results_file) 134 | 135 | compare_scores(map_before, map_after, tolerance, throws_error) 136 | 137 | create_metrics_json(map_before, map_after, metrics_json_file) 138 | 139 | 140 | if __name__ == "__main__": 141 | typer.run(compare_map_before_and_after_conversion) 142 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/compare_map/compare_pipeline.py: -------------------------------------------------------------------------------- 1 | """This script runs mlops/components/compare_map.yml. 2 | 3 | this step will eventually be integrated into one AML pipeline that is under development 4 | This is currently for testing purpose for individual AML component 5 | """ 6 | from azure.ai.ml import MLClient, load_component 7 | from azure.ai.ml.dsl import pipeline 8 | from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential 9 | 10 | compare_map_component = load_component(source="mlops/components/compare_map.yml") 11 | 12 | 13 | @pipeline( 14 | default_compute="dev-pipeline", 15 | ) 16 | def compare_map_pipeline(map_before, map_after): 17 | """Run compare_map component.""" 18 | compare_map_component(map_before=map_before, map_after=map_after) 19 | 20 | 21 | # create a pipeline 22 | # TODO: these input values will be replaced 23 | # with the actual computed values from previous steps 24 | pipeline_job = compare_map_pipeline(map_before=0.98, map_after=0.97) 25 | 26 | try: 27 | credential = DefaultAzureCredential() 28 | # Check if given credential can get token successfully. 29 | credential.get_token("https://management.azure.com/.default") 30 | except Exception: 31 | # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work 32 | credential = InteractiveBrowserCredential() 33 | 34 | # Get a handle to workspace 35 | ml_client = MLClient.from_config(credential=credential) 36 | pipeline_job = ml_client.jobs.create_or_update( 37 | pipeline_job, experiment_name="map_comparison" 38 | ) 39 | # wait until the job completes 40 | ml_client.jobs.stream(pipeline_job.name) 41 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/convert/convert_fp32_to_fp16.py: -------------------------------------------------------------------------------- 1 | """Convert all onnx models in input_dir and downcast the weights to fp16.""" 2 | from pathlib import Path 3 | from shutil import copyfile 4 | import onnx 5 | from onnxconverter_common import float16 6 | import typer 7 | 8 | 9 | def convert_fp32_to_fp16(input_dir: Path, output_dir: Path): 10 | """Convert model.onnx in input_dir and downcast the weights to fp16. 11 | 12 | Also copies over the associated labels.json file. 13 | 14 | Args: 15 | input_dir (Path): directory that contains fp32 onnx model 16 | output_dir (Path): directory where downcasted fp16 onnx model is stored 17 | When this directory doesn't exist, target directory is created 18 | """ 19 | onnx_file = Path(input_dir, 'train_artifacts/model.onnx') 20 | label_file = Path(input_dir, 'train_artifacts/labels.json') 21 | if not onnx_file.is_file(): 22 | raise FileNotFoundError( 23 | f"Directory: {input_dir} had no .onnx files." 24 | " Conversion process is terminated." 25 | ) 26 | 27 | output_dir = Path(output_dir, "train_artifacts") 28 | output_dir.mkdir(parents=True, exist_ok=True) 29 | 30 | model = onnx.load(onnx_file) 31 | print(f"{onnx_file} will be converted to fp16") 32 | model_fp16 = float16.convert_float_to_float16(model) 33 | saved_model_path = Path(output_dir, "model.onnx") 34 | onnx.save(model_fp16, saved_model_path) 35 | print( 36 | "Conversion was successful and" 37 | f" fp16 onnx model was saved at {saved_model_path}" 38 | ) 39 | 40 | fp16_label_file = Path(output_dir, 'labels.json') 41 | copyfile(label_file, fp16_label_file) 42 | 43 | 44 | if __name__ == "__main__": 45 | typer.run(convert_fp32_to_fp16) 46 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/convert/convert_pipeline.py: -------------------------------------------------------------------------------- 1 | """This script runs mlops/components/convert.yml. 2 | 3 | this step will eventually be integrated into one AML pipeline that is under development 4 | This is currently for testing purpose for individual AML component 5 | """ 6 | from azure.ai.ml import Input, MLClient, load_component 7 | from azure.ai.ml.dsl import pipeline 8 | from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential 9 | 10 | onnx_test_ds = Input( 11 | path="azureml://datastores/workspaceblobstore/paths/onnx_test_fp32" 12 | ) 13 | 14 | convert_component = load_component(source="mlops/components/convert.yml") 15 | 16 | 17 | @pipeline( 18 | default_compute="dev-pipeline", 19 | ) 20 | def convert_pipeline(input: Input): 21 | """Run convert component.""" 22 | convert_component(fp32_input_dir=input) 23 | # this output will be used when this is integrated with other components 24 | # convert_node.outputs.fp16_output_dir 25 | 26 | 27 | # create a pipeline 28 | pipeline_job = convert_pipeline(input=onnx_test_ds) 29 | 30 | try: 31 | credential = DefaultAzureCredential() 32 | # Check if given credential can get token successfully. 33 | credential.get_token("https://management.azure.com/.default") 34 | except Exception: 35 | # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work 36 | credential = InteractiveBrowserCredential() 37 | 38 | # Get a handle to workspace 39 | ml_client = MLClient.from_config(credential=credential) 40 | pipeline_job = ml_client.jobs.create_or_update( 41 | pipeline_job, experiment_name="model_conversion" 42 | ) 43 | # wait until the job completes 44 | ml_client.jobs.stream(pipeline_job.name) 45 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/prep/__init__.py: -------------------------------------------------------------------------------- 1 | """Prep step for fridge objects dataset.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/prep/voc_jsonl_converter.py: -------------------------------------------------------------------------------- 1 | """Pascal VOC to JSONL conversion for object detection annotations.""" 2 | import os 3 | from xml.etree import ElementTree 4 | import json 5 | 6 | 7 | class JSONLConverter: 8 | """ 9 | Base class for JSONL converters. 10 | 11 | ... 12 | Attributes 13 | --------- 14 | base_url : str 15 | the base for the image_url to be written into the jsonl file 16 | """ 17 | 18 | def __init__(self, base_url: str): 19 | """Construct JSONLConverter. 20 | 21 | Args: 22 | base_url (str): the base for the image_url to be written into the jsonl file. 23 | """ 24 | self.jsonl_data = [] 25 | self.base_url = base_url 26 | 27 | def convert(self): 28 | """Inheriters should implement this method. 29 | 30 | Raises: 31 | NotImplementedError: when called on base class directly. 32 | """ 33 | raise NotImplementedError 34 | 35 | 36 | def write_json_lines(converter: JSONLConverter, filename: str): 37 | """Convert and write a JSONL file. 38 | 39 | Parameters: 40 | converter (JSONLConverter): the converter use to generate the jsonl 41 | filename (str): output file for writing jsonl 42 | """ 43 | json_lines_data = converter.convert() 44 | with open(filename, "w") as outfile: 45 | for json_line in json_lines_data: 46 | json.dump(json_line, outfile, separators=(",", ":")) 47 | outfile.write("\n") 48 | print(f"Conversion completed. Converted {len(json_lines_data)} lines.") 49 | 50 | 51 | class VOCJSONLConverter(JSONLConverter): 52 | """Class for converting VOC data for object detection into jsonl files.""" 53 | 54 | def __init__(self, base_url: str, xml_dir: str): 55 | """Create VOCJSONLConverter. 56 | 57 | ... 58 | Attributes 59 | --------- 60 | base_url : str 61 | the base for the image_url to be written into the jsonl file 62 | xml_dir : str 63 | directory of xml annotation files 64 | """ 65 | super().__init__(base_url=base_url) 66 | self.xml_dir = xml_dir 67 | 68 | def convert(self): 69 | """Generate jsonl data for object detection or instance segmentation. 70 | 71 | return: list of lines for jsonl 72 | rtype: List 73 | 74 | """ 75 | json_line_sample = { 76 | "image_url": self.base_url, 77 | "image_details": {"format": None, "width": None, "height": None}, 78 | "label": [], 79 | } 80 | 81 | for i, filename in enumerate(os.listdir(self.xml_dir)): 82 | if not filename.endswith(".xml"): 83 | print(f"Skipping unknown file: {filename}") 84 | continue 85 | 86 | annotation_filename = os.path.join(self.xml_dir, filename) 87 | print(f"Parsing {annotation_filename}") 88 | 89 | root = ElementTree.parse(annotation_filename).getroot() 90 | width = int(root.find("size/width").text) 91 | height = int(root.find("size/height").text) 92 | 93 | labels = [] 94 | for index, object in enumerate(root.findall("object")): 95 | name = object.find("name").text 96 | is_crowd = int(object.find("difficult").text) 97 | 98 | xmin = object.find("bndbox/xmin").text 99 | ymin = object.find("bndbox/ymin").text 100 | xmax = object.find("bndbox/xmax").text 101 | ymax = object.find("bndbox/ymax").text 102 | 103 | labels.append( 104 | { 105 | "label": name, 106 | "topX": float(xmin) / width, 107 | "topY": float(ymin) / height, 108 | "bottomX": float(xmax) / width, 109 | "bottomY": float(ymax) / height, 110 | "isCrowd": is_crowd, 111 | } 112 | ) 113 | 114 | # build the jsonl file 115 | image_filename = root.find("filename").text 116 | _, file_extension = os.path.splitext(image_filename) 117 | json_line = dict(json_line_sample) 118 | json_line["image_url"] = os.path.join( 119 | json_line["image_url"], image_filename 120 | ) 121 | json_line["image_details"]["format"] = file_extension[1:] 122 | json_line["image_details"]["width"] = width 123 | json_line["image_details"]["height"] = height 124 | json_line["label"] = labels 125 | 126 | self.jsonl_data.append(json_line) 127 | return self.jsonl_data 128 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/register/__init__.py: -------------------------------------------------------------------------------- 1 | """Model registration in AML workspace module.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/register/register.py: -------------------------------------------------------------------------------- 1 | """Register the ONNX model to AML workspace.""" 2 | import argparse 3 | import json 4 | from azure.ai.ml.entities import Model 5 | from azure.ai.ml.constants import AssetTypes 6 | from common.mlops.get_aml_client import get_aml_client 7 | import logging 8 | import shutil 9 | 10 | 11 | def main( 12 | client_id: str, 13 | client_secret: str, 14 | tenant_id: str, 15 | subscription_id: str, 16 | resource_group_name: str, 17 | workspace_name: str, 18 | input_model_artifacts_path: str, 19 | registered_model_name: str, 20 | registered_model_description: str, 21 | build_reference: str, 22 | metrics_json_path: str 23 | ): 24 | """Register the ONNX model to the AML workspace. 25 | 26 | Args: 27 | client_id (str): AAD client ID. 28 | client_secret (str): AAD client secret. 29 | tenant_id (str): AAD tenant ID. 30 | subscription_id (str): AML subscription ID. 31 | resource_group_name (str): AML resource group name. 32 | workspace_name (str): AML workspace name. 33 | input_model_artifacts_path (str): the path to the input model artifacts. Should 34 | contain the model ONNX file and the labels.json file. 35 | registered_model_name (str): the name of the registered model in AML. 36 | registered_model_description (str): the description of the registered model in AML. 37 | build_reference (str): the AzDO build reference that generated the model. 38 | metrics_json_path (str): the path to the metrics.json file containing the mAP score 39 | on the test set for the ONNX FP32 model and the ONNX FP16 model. 40 | """ 41 | # Create ML Client 42 | ml_client = get_aml_client( 43 | client_id=client_id, 44 | client_secret=client_secret, 45 | tenant_id=tenant_id, 46 | subscription_id=subscription_id, 47 | resource_group_name=resource_group_name, 48 | workspace_name=workspace_name, 49 | ) 50 | 51 | if ml_client is None: 52 | raise Exception("Could not create MLClient") 53 | 54 | print(f"ML Client created successfully {str(ml_client)}") 55 | 56 | # load the metrics file and get the mAP scores 57 | with open(metrics_json_path, "r") as f: 58 | metrics = json.load(f) 59 | map_onnx_fp16 = metrics["map_onnx_fp16"] 60 | map_onnx_fp32 = metrics["map_onnx_fp32"] 61 | 62 | compressed_model_file = shutil.make_archive( 63 | base_name="model_artifacts", format="gztar", root_dir=input_model_artifacts_path 64 | ) 65 | 66 | onnx_model = Model( 67 | path=compressed_model_file, 68 | name=registered_model_name, 69 | description=registered_model_description, 70 | type=AssetTypes.CUSTOM_MODEL, 71 | tags={ 72 | "build_reference": build_reference, 73 | "map_onnx_fp16": map_onnx_fp16, 74 | "map_onnx_fp32": map_onnx_fp32, 75 | }, 76 | ) 77 | registered_model = ml_client.models.create_or_update(onnx_model) 78 | logging.info(f"The registered model ID: {registered_model.id}") 79 | 80 | 81 | if __name__ == "__main__": 82 | parser = argparse.ArgumentParser() 83 | parser.add_argument("--client_id", type=str, help="Azure client id") 84 | parser.add_argument("--client_secret", type=str, help="Azure client secret") 85 | parser.add_argument("--tenant_id", type=str, help="Azure tenant id") 86 | 87 | parser.add_argument("--subscription_id", type=str, 88 | help="Azure subscription id") 89 | parser.add_argument( 90 | "--resource_group_name", type=str, help="Azure Machine learning resource group" 91 | ) 92 | parser.add_argument( 93 | "--workspace_name", type=str, help="Azure Machine learning Workspace name" 94 | ) 95 | parser.add_argument( 96 | "--input_model_artifacts_path", 97 | type=str, 98 | help="The path to the input model artifacts. Should include the ONNX model and the labels.json file.", 99 | ) 100 | parser.add_argument( 101 | "--registered_model_name", 102 | type=str, 103 | default="fridge-objects-automl-onnx", 104 | help="The name of the registered model.", 105 | ) 106 | parser.add_argument( 107 | "--registered_model_description", 108 | type=str, 109 | default="Best AutoML Object Detection ONNX model for fridge objects dataset.", 110 | help="The description of the registered model.", 111 | ) 112 | parser.add_argument( 113 | "--build_reference", 114 | type=str, 115 | help="Original AzDo build id that initiated experiment", 116 | ) 117 | parser.add_argument( 118 | "--metrics_json_path", 119 | type=str, 120 | help="Path to the metrics.json file containing the mAP scores for the ONNX FP32 and FP16 models.", 121 | ) 122 | args = parser.parse_args() 123 | main( 124 | client_id=args.client_id, 125 | client_secret=args.client_secret, 126 | tenant_id=args.tenant_id, 127 | subscription_id=args.subscription_id, 128 | resource_group_name=args.resource_group_name, 129 | workspace_name=args.workspace_name, 130 | input_model_artifacts_path=args.input_model_artifacts_path, 131 | registered_model_name=args.registered_model_name, 132 | registered_model_description=args.registered_model_description, 133 | build_reference=args.build_reference, 134 | metrics_json_path=args.metrics_json_path, 135 | ) 136 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/score/__init__.py: -------------------------------------------------------------------------------- 1 | """Score step for fridge objects dataset.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/train/__init__.py: -------------------------------------------------------------------------------- 1 | """Fridge objects training pipeliene AutoML train source code.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/test/__init__.py: -------------------------------------------------------------------------------- 1 | """Unit tests for the model_factory/fridge_obj_det/src package.""" 2 | -------------------------------------------------------------------------------- /telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/test/test_compare_map.py: -------------------------------------------------------------------------------- 1 | """Unit tests to compare two mAP values.""" 2 | from contextlib import nullcontext as does_not_raise 3 | 4 | import pytest 5 | from model_factory.fridge_obj_det.src.compare_map import compare_map 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "map_before,map_after,throws_error,expectation", 10 | [ 11 | # exact same with tolerance 0.01 12 | (0.98, 0.97, True, does_not_raise()), 13 | (0.84, 0.83, True, does_not_raise()), 14 | # smaller than tolerance 0.01 (improved mAP) 15 | (0.98, 0.99, True, does_not_raise()), 16 | # smaller than tolerance 0.01 17 | (0.98, 0.975, True, does_not_raise()), 18 | # larger than tolerance 0.01 19 | (0.98, 0.96, True, pytest.raises(ValueError)), 20 | (0.98, 0.96, False, does_not_raise()), 21 | (0.98, 0.969999999999, True, pytest.raises(ValueError)), 22 | (0.97, 0.959999999999, True, pytest.raises(ValueError)), 23 | ], 24 | ) 25 | def test_compare_map(map_before, map_after, throws_error, expectation): 26 | """Test compare_map_before_and_after_conversion.""" 27 | with expectation: 28 | compare_map.compare_scores( 29 | map_before, map_after, tolerance=0.01, throws_error=throws_error 30 | ) 31 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/test/__init__.py -------------------------------------------------------------------------------- /test/london_taxi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/test/london_taxi/__init__.py -------------------------------------------------------------------------------- /test/london_taxi/test_to_delete.py: -------------------------------------------------------------------------------- 1 | def test_print(): 2 | try: 3 | print("Hello") is None 4 | except: 5 | print("Test print function failed.") 6 | assert False 7 | -------------------------------------------------------------------------------- /test/nyc_taxi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/test/nyc_taxi/__init__.py -------------------------------------------------------------------------------- /test/nyc_taxi/test_to_delete.py: -------------------------------------------------------------------------------- 1 | def test_print(): 2 | try: 3 | print("Hello") is None 4 | except: 5 | print("Test print function failed.") 6 | assert False 7 | -------------------------------------------------------------------------------- /test/test_to_delete.py: -------------------------------------------------------------------------------- 1 | def test_print(): 2 | try: 3 | print("Hello") is None 4 | except: 5 | print("Test print function failed.") 6 | assert False 7 | --------------------------------------------------------------------------------