├── .gitignore
├── CODE_OF_CONDUCT.md
├── CaseStudy.md
├── LICENSE
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── config
    └── model_config.json
├── devops
    └── pipeline
    │   ├── build_validation_pipeline.yml
    │   ├── london_taxi_ci_dev_pipeline.yml
    │   ├── london_taxi_pr_dev_pipeline.yml
    │   ├── nyc_taxi_ci_dev_pipeline.yml
    │   ├── nyc_taxi_pr_dev_pipeline.yml
    │   ├── platform_ci_dev_pipeline.yml
    │   ├── platform_pr_dev_pipeline.yml
    │   ├── requirements
    │       ├── build_validation_requirements.txt
    │       └── execute_job_requirements.txt
    │   └── templates
    │       ├── configure_azureml_agent.yml
    │       ├── execute_mlops_pipeline.yml
    │       ├── experiment_variables.yml
    │       ├── get_connection_details.yml
    │       ├── variables_template.yml
    │       └── wait_with_extension_job.yml
├── docs
    ├── how_to_setup.md
    └── images
    │   ├── ConceptualDesign.png
    │   ├── DataCollection_Design.png
    │   ├── MLModelFactory_Design.png
    │   ├── Orchestrated_Inferencing.png
    │   ├── ProblemStatement_FishBone.png
    │   └── UseCaseBuilder_Design.png
├── image.png
├── mlops
    ├── __init__.py
    ├── common
    │   ├── __init__.py
    │   ├── get_compute.py
    │   ├── get_environment.py
    │   ├── get_workspace.py
    │   └── logger.py
    ├── london_taxi
    │   ├── components
    │   │   ├── predict.yml
    │   │   ├── prep.yml
    │   │   ├── register.yml
    │   │   ├── score.yml
    │   │   ├── train.yml
    │   │   └── transform.yml
    │   ├── data
    │   │   ├── greenTaxiData.csv
    │   │   └── yellowTaxiData.csv
    │   ├── environment
    │   │   └── conda.yml
    │   └── src
    │   │   ├── __init__.py
    │   │   └── mlops_pipeline.py
    └── nyc_taxi
    │   ├── components
    │       ├── predict.yml
    │       ├── prep.yml
    │       ├── register.yml
    │       ├── score.yml
    │       ├── train.yml
    │       └── transform.yml
    │   ├── data
    │       ├── greenTaxiData.csv
    │       └── yellowTaxiData.csv
    │   ├── environment
    │       └── conda.yml
    │   └── src
    │       ├── __init__.py
    │       └── mlops_pipeline.py
├── model
    ├── london_taxi
    │   ├── dockerfile
    │   ├── environment
    │   │   └── requirements.txt
    │   ├── pipeline-requirements.txt
    │   ├── sample-request.json
    │   └── scoring
    │   │   └── score.py
    └── nyc_taxi
    │   ├── dockerfile
    │   ├── environment
    │       └── requirements.txt
    │   ├── pipeline-requirements.txt
    │   ├── sample-request.json
    │   └── scoring
    │       └── score.py
├── notebooks
    └── execute_commands.ipynb
├── src
    ├── __init__.py
    ├── london_src
    │   ├── __init__.py
    │   ├── predict
    │   │   ├── __init__.py
    │   │   └── predict.py
    │   ├── prep
    │   │   ├── __init__.py
    │   │   └── prep.py
    │   ├── register
    │   │   ├── __init__.py
    │   │   └── register.py
    │   ├── score
    │   │   ├── __init__.py
    │   │   └── score.py
    │   ├── train
    │   │   ├── __init__.py
    │   │   └── train.py
    │   └── transform
    │   │   ├── __init__.py
    │   │   └── transform.py
    └── nyc_src
    │   ├── __init__.py
    │   ├── predict
    │       ├── __init__.py
    │       └── predict.py
    │   ├── prep
    │       ├── __init__.py
    │       └── prep.py
    │   ├── register
    │       ├── __init__.py
    │       └── register.py
    │   ├── score
    │       ├── __init__.py
    │       └── score.py
    │   ├── train
    │       ├── __init__.py
    │       └── train.py
    │   └── transform
    │       ├── __init__.py
    │       └── transform.py
├── telco_case_study_implementation
    └── fridge_object_detection
    │   ├── .gitignore
    │   ├── README.md
    │   ├── docs
    │       ├── 01-model-factory-design.md
    │       ├── 02-instructions.md
    │       └── assets
    │       │   └── images
    │       │       └── model_factory_design.jpg
    │   └── model_factory
    │       ├── __init__.py
    │       ├── common
    │           ├── __init__.py
    │           ├── devops
    │           │   └── templates
    │           │   │   ├── configure_azureml_agent.yml
    │           │   │   ├── execute_mlops_pipeline.yml
    │           │   │   ├── experiment_variables.yml
    │           │   │   ├── get_connection_details.yml
    │           │   │   ├── image_generation_template.yml
    │           │   │   ├── platform_dev_pipeline.yml
    │           │   │   ├── platform_main_pipeline.yml
    │           │   │   └── variables_template.yml
    │           ├── logging
    │           │   ├── __init__.py
    │           │   └── logger.py
    │           └── mlops
    │           │   ├── __init__.py
    │           │   ├── get_aml_client.py
    │           │   ├── get_compute.py
    │           │   ├── get_environment.py
    │           │   └── get_workspace.py
    │       └── fridge_obj_det
    │           ├── Makefile
    │           ├── __init__.py
    │           ├── config
    │               └── model_config.json
    │           ├── devops
    │               └── pipelines
    │               │   ├── build_validation_pipeline.yml
    │               │   ├── fridge_obj_det_dev_pipeline.yml
    │               │   ├── fridge_obj_det_main_pipeline.yml
    │               │   ├── fridge_obj_det_mlops_pipeline.yml
    │               │   └── requirements
    │               │       ├── build_validation_requirements.txt
    │               │       └── execute_job_requirements.txt
    │           ├── environment
    │               └── requirements.txt
    │           ├── mlops
    │               ├── .gitkeep
    │               ├── __init__.py
    │               ├── components
    │               │   ├── compare_map.yml
    │               │   ├── convert.yml
    │               │   ├── prep.yml
    │               │   ├── register.yml
    │               │   ├── score.yml
    │               │   └── train.yml
    │               ├── environment
    │               │   ├── conda.yml
    │               │   └── create_devenv.py
    │               └── src
    │               │   ├── __init__.py
    │               │   └── mlops_pipeline.py
    │           ├── model
    │               ├── .python-version
    │               ├── Dockerfile
    │               ├── __init__.py
    │               ├── model_artifacts
    │               │   ├── labels.json
    │               │   └── test_sample.jpg
    │               ├── package.json
    │               ├── poetry.lock
    │               ├── pyproject.toml
    │               ├── scoring
    │               │   ├── __init__.py
    │               │   ├── predict.py
    │               │   ├── prepare.py
    │               │   └── score.py
    │               └── server
    │               │   └── __main__.py
    │           ├── src
    │               ├── __init__.py
    │               ├── compare_map
    │               │   ├── __init__.py
    │               │   ├── compare_map.py
    │               │   └── compare_pipeline.py
    │               ├── convert
    │               │   ├── convert_fp32_to_fp16.py
    │               │   └── convert_pipeline.py
    │               ├── prep
    │               │   ├── __init__.py
    │               │   ├── prep.py
    │               │   └── voc_jsonl_converter.py
    │               ├── register
    │               │   ├── __init__.py
    │               │   └── register.py
    │               ├── score
    │               │   ├── __init__.py
    │               │   └── score.py
    │               └── train
    │               │   ├── __init__.py
    │               │   └── train.py
    │           └── test
    │               ├── __init__.py
    │               └── test_compare_map.py
└── test
    ├── __init__.py
    ├── london_taxi
        ├── __init__.py
        └── test_to_delete.py
    ├── nyc_taxi
        ├── __init__.py
        └── test_to_delete.py
    └── test_to_delete.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MLOps Model Factory Accelerator
 2 | 
 3 | > **Note:**
 4 | > This is a repo that can be shared to our customers. This means it's NOT OK to include Microsoft confidential
 5 | > content. All discussions should be appropriate for a public audience.
 6 | 
 7 | MLOps Model Factory is a platform and an end to end workflow that supports generating multiple models and used for deployment to any target. 
 8 | 
 9 | ## Features
10 | 
11 | - Supports generation of multiple ML Models through a single platform and repo
12 | - MLOps pipeline for Data preparation, transformation, Model Training, evaluation, scoring and registration 
13 | - Based on Azure ML SDK v2 1.4
14 | - Option to package ML Models in Docker Images
15 | 
16 | 
17 | 
18 | ## About this repo
19 | 
20 | The idea of this platform and end to end workflow is to provide a minimum number of scripts to implement an environment to train and test multiple ML Models using Azure ML SDK v2 and Azure DevOps.
21 | 
22 | The workflow contains the following folders/files:
23 | 
24 | - devops: the folder contains Azure DevOps related files (yaml files to define Builds).
25 | - docs: documentation.
26 | - src: source code that is not related to Azure ML directly. This is typically data science related code.
27 | - mlops: scripts that are related to Azure ML.
28 | - mlops/nyc-taxi: a fake pipeline with some basic code to build a model
29 | - mlops/london-taxi: a fake pipeline with some basic code to build another model
30 | - test: a folder with dummy test to write unit tests for the build
31 | - model: Model related files and dependencies
32 | 
33 | - .amlignore: using this file we are removing all the folders and files that are not supposed to be in Azure ML compute.
34 | 
35 | The workflow contains the following documents:
36 | 
37 | - docs/how_to_setup.md: explain how to configure the workflow.
38 | 
39 | ## How to use the repo
40 | 
41 | Information about how to setup the repo is in [the following document](./docs/how_to_setup.md).  
42 | 
43 | ## Local experimentation
44 | 
45 | Developers and Data scientists can use the [execute-command](./notebooks/execute_commands.ipynb) in the `notebooks` to try out the commands in the AML compute from their local machine.
46 | 
47 | ## Reference
48 | 
49 | * [Azure Machine learning](https://docs.microsoft.com/azure/machine-learning)
50 | * [Azure DevOps pipelines](https://learn.microsoft.com/en-gb/azure/devops/pipelines/)
51 | * [Azure Machine learning SDK V2](https://learn.microsoft.com/en-gb/python/api/overview/azure/ai-ml-readme?view=azure-python)
52 | * [Azure AD Service Principal](https://learn.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal)
53 | * [Azure Key Vault](https://learn.microsoft.com/en-gb/azure/key-vault/general/)


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # TODO: The maintainer of this repo has not yet edited this file
 2 | 
 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
 4 | 
 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help.
 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps.
 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide.
 8 | 
 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
10 | 
11 | # Support
12 | 
13 | ## How to file issues and get help  
14 | 
15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
16 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
17 | feature request as a new Issue.
18 | 
19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 
20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
22 | 
23 | ## Microsoft Support Policy  
24 | 
25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
26 | 


--------------------------------------------------------------------------------
/config/model_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "models":[
 3 |         {
 4 |             "ML_MODEL_CONFIG_NAME": "london_taxi",
 5 |             "ENV_NAME": "dev",
 6 |             "CLUSTER_NAME": "new-cluster",
 7 |             "CLUSTER_REGION": "eastus",
 8 |             "CLUSTER_SIZE": "STANDARD_DS3_v2",
 9 |             "CONDA_PATH": "mlops/london_taxi/environment/conda.yml",
10 |             "DISPLAY_BASE_NAME": "mlops",
11 |             "ENV_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
12 |             "ENVIRONMENT_NAME": "sklearn-python3",
13 |             "EXPERIMENT_BASE_NAME": "londontaxi",
14 |             "KEYVAULT_NAME": "researchmlops5963078644",
15 |             "MODEL_BASE_NAME": "regr",
16 |             "RESOURCE_GROUP_NAME": "mlops",
17 |             "WORKSPACE_NAME": "researchmlops"
18 |         },
19 |         {
20 |             "ML_MODEL_CONFIG_NAME": "nyc_taxi",
21 |             "ENV_NAME": "dev",
22 |             "CLUSTER_NAME": "new-cluster",
23 |             "CLUSTER_REGION": "eastus",
24 |             "CLUSTER_SIZE": "STANDARD_DS3_v2",
25 |             "CONDA_PATH": "mlops/nyc_taxi/environment/conda.yml",
26 |             "DISPLAY_BASE_NAME": "mlops",
27 |             "ENV_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
28 |             "ENVIRONMENT_NAME": "sklearn-python3",
29 |             "EXPERIMENT_BASE_NAME": "nyctaxi",
30 |             "KEYVAULT_NAME": "researchmlops5963078644",
31 |             "MODEL_BASE_NAME": "cls",
32 |             "RESOURCE_GROUP_NAME": "mlops",
33 |             "WORKSPACE_NAME": "researchmlops"           
34 |         }
35 |     ]
36 | }


--------------------------------------------------------------------------------
/devops/pipeline/build_validation_pipeline.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 |  - name: model_type
 3 |    displayName: "type of model to execute"
 4 | 
 5 | jobs:
 6 | - job: Build_Validation_Pipeline
 7 |   steps:
 8 |   - task: UsePythonVersion@0
 9 |     displayName: 'Use Python 3.8'
10 |     inputs:
11 |       versionSpec: '3.8'
12 | 
13 |   - script: |
14 |       python -m pip install --upgrade pip
15 |       pip install -r devops/pipeline/requirements/build_validation_requirements.txt
16 |     displayName: "Load Python Dependencies"
17 | 
18 | 
19 |   - script: |
20 |       pytest test/${{ parameters.model_type }}  --ignore=sandbox/ --junitxml=junit/test-results.xml --cov=. --cov-report=xml
21 |     displayName: 'Run Unit Tests'
22 |     condition: succeededOrFailed()
23 | 
24 |   - task: PublishTestResults@2
25 |     condition: succeededOrFailed()
26 |     inputs:
27 |       testResultsFiles: '**/test-*.xml'
28 |       testRunTitle: 'Publish Test Results for Python $(python.version)'
29 | 
30 |   - task: PublishCodeCoverageResults@1
31 |     inputs:
32 |       codeCoverageTool: Cobertura
33 |       summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'


--------------------------------------------------------------------------------
/devops/pipeline/london_taxi_ci_dev_pipeline.yml:
--------------------------------------------------------------------------------
 1 | pr: none
 2 | trigger:
 3 |   branches:
 4 |    include:
 5 |      - master
 6 |   paths:
 7 |     include:
 8 |       - devops/*
 9 |       - mlops/common/*
10 |       - mlops/london_taxi/*
11 |       - src/london_src/*
12 |       - model/london_taxi/*
13 |       - src/shared/*
14 | 
15 | pool:
16 |   vmImage: ubuntu-latest
17 | 
18 | 
19 | variables:
20 | - group: mlops_platform_dev_vg
21 | - name: PIPELINE_TYPE
22 |   value: london_taxi
23 | 
24 | parameters:
25 |  - name: env_name
26 |    displayName: "Execution Environment"
27 |    default: "dev"
28 |  - name: model_type
29 |    displayName: "type of model to execute"
30 |    default: "london_taxi"
31 | 
32 | stages:
33 |   - template: templates/variables_template.yml
34 |     parameters:
35 |       env_name: ${{parameters.env_name}}
36 |       model_type: ${{parameters.model_type}}
37 |   - template: platform_ci_dev_pipeline.yml
38 |     parameters:
39 |       exec_environment: ${{ parameters.env_name }}
40 |       model_type: ${{ parameters.model_type }}


--------------------------------------------------------------------------------
/devops/pipeline/london_taxi_pr_dev_pipeline.yml:
--------------------------------------------------------------------------------
 1 | trigger: none
 2 | pr:
 3 |   branches:
 4 |    include:
 5 |      - development
 6 |   paths:
 7 |     include:
 8 |       - devops/*
 9 |       - mlops/common/*
10 |       - mlops/london_taxi/*
11 |       - src/london_src/*
12 |       - model/london_taxi/*
13 |       - src/shared/*
14 | 
15 | pool:
16 |   vmImage: ubuntu-latest
17 | 
18 | 
19 | variables:
20 | - group: mlops_platform_dev_vg
21 | - name: PIPELINE_TYPE
22 |   value: london_taxi
23 | 
24 | parameters:
25 |  - name: env_name
26 |    displayName: "Execution Environment"
27 |    default: "dev"
28 |  - name: model_type
29 |    displayName: "type of model to execute"
30 |    default: "london_taxi"
31 | 
32 | stages:
33 |   - template: templates/variables_template.yml
34 |     parameters:
35 |       env_name: ${{parameters.env_name}}
36 |       model_type: ${{parameters.model_type}}
37 |   - template: platform_pr_dev_pipeline.yml
38 |     parameters:
39 |       exec_environment: ${{ parameters.env_name }}
40 |       model_type: ${{ parameters.model_type }}


--------------------------------------------------------------------------------
/devops/pipeline/nyc_taxi_ci_dev_pipeline.yml:
--------------------------------------------------------------------------------
 1 | pr: none
 2 | trigger:
 3 |   branches:
 4 |    include:
 5 |      - master
 6 |   paths:
 7 |     include:
 8 |       - devops/*
 9 |       - mlops/common/*
10 |       - mlops/nyc_taxi/*
11 |       - src/nyc_src/*
12 |       - model/nyc_taxi/*
13 |       - src/shared/*
14 | 
15 | pool:
16 |   vmImage: ubuntu-latest
17 | 
18 | 
19 | variables:
20 | - group: mlops_platform_dev_vg
21 | - name: PIPELINE_TYPE
22 |   value: nyc_taxi
23 | 
24 | parameters:
25 |  - name: env_name
26 |    displayName: "Execution Environment"
27 |    default: "dev"
28 |  - name: model_type
29 |    displayName: "type of model to execute"
30 |    default: "nyc_taxi"
31 | 
32 | stages:
33 |   - template: templates/variables_template.yml
34 |     parameters:
35 |       env_name: ${{parameters.env_name}}
36 |       model_type: ${{parameters.model_type}}
37 |   - template: platform_ci_dev_pipeline.yml
38 |     parameters:
39 |       exec_environment: ${{ parameters.env_name }}
40 |       model_type: ${{ parameters.model_type }}


--------------------------------------------------------------------------------
/devops/pipeline/nyc_taxi_pr_dev_pipeline.yml:
--------------------------------------------------------------------------------
 1 | trigger: none
 2 | pr:
 3 |   branches:
 4 |    include:
 5 |      - development
 6 |   paths:
 7 |     include:
 8 |       - devops/*
 9 |       - mlops/common/*
10 |       - mlops/nyc_taxi/*
11 |       - src/nyc_src/*
12 |       - model/nyc_taxi/*
13 |       - src/shared/*
14 | 
15 | pool:
16 |   vmImage: ubuntu-latest
17 | 
18 | 
19 | variables:
20 | - group: mlops_platform_dev_vg
21 | - name: PIPELINE_TYPE
22 |   value: nyc_taxi
23 | 
24 | parameters:
25 |  - name: env_name
26 |    displayName: "Execution Environment"
27 |    default: "dev"
28 |  - name: model_type
29 |    displayName: "type of model to execute"
30 |    default: "nyc_taxi"
31 | 
32 | stages:
33 |   - template: templates/variables_template.yml
34 |     parameters:
35 |       env_name: ${{parameters.env_name}}
36 |       model_type: ${{parameters.model_type}}
37 |   - template: platform_pr_dev_pipeline.yml
38 |     parameters:
39 |       exec_environment: ${{ parameters.env_name }}
40 |       model_type: ${{ parameters.model_type }}


--------------------------------------------------------------------------------
/devops/pipeline/platform_ci_dev_pipeline.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | parameters:
 3 |  - name: exec_environment
 4 |    displayName: "Execution Environment"
 5 |    default: "dev"
 6 |  - name: model_type
 7 |    displayName: "type of model to execute"
 8 | 
 9 | 
10 | stages:
11 |     - stage: execute_training_job
12 |       displayName: execute_training_job
13 |       dependsOn: 
14 |       - variable_generation
15 |       variables:
16 |       - template: templates/experiment_variables.yml
17 |       jobs:
18 |       - job: Execute_ml_Job_Pipeline
19 |         steps:
20 |         - template: templates/get_connection_details.yml
21 |         - template: templates/configure_azureml_agent.yml
22 |         - template: templates/execute_mlops_pipeline.yml
23 |           parameters:
24 |             script_parameter: |
25 |               python -m mlops.${{ parameters.model_type }}.src.mlops_pipeline \
26 |                 --subscription_id $(SUBSCRIPTION_ID) \
27 |                 --resource_group_name $(RESOURCE_GROUP_NAME) \
28 |                 --workspace_name $(WORKSPACE_NAME) \
29 |                 --cluster_name $(CLUSTER_NAME) \
30 |                 --cluster_size $(CLUSTER_SIZE) \
31 |                 --cluster_region $(CLUSTER_REGION) \
32 |                 --build_reference $(BUILD.BUILDID) \
33 |                 --deploy_environment ${{parameters.exec_environment}} \
34 |                 --experiment_name $(EXPERIMENT_NAME) \
35 |                 --display_name $(DISPLAY_NAME) \
36 |                 --wait_for_completion True \
37 |                 --environment_name $(ENVIRONMENT_NAME) \
38 |                 --env_base_image_name $(ENV_BASE_IMAGE_NAME) \
39 |                 --model_name $(MODEL_NAME) \
40 |                 --conda_path $(CONDA_PATH) \
41 |                 --output_file run_id.txt
42 | 
43 | 


--------------------------------------------------------------------------------
/devops/pipeline/platform_pr_dev_pipeline.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | parameters:
 3 |  - name: exec_environment
 4 |    displayName: "Execution Environment"
 5 |    default: "dev"
 6 |  - name: model_type
 7 |    displayName: "type of model to execute"
 8 | 
 9 | 
10 | stages:
11 |     - stage: build_validation
12 |       displayName: build_validation
13 |       dependsOn: 
14 |       - variable_generation
15 |       variables:
16 |       - template: templates/experiment_variables.yml
17 |       jobs:
18 |         - template: build_validation_pipeline.yml
19 |           parameters:
20 |             model_type: ${{ parameters.model_type }}
21 |     - stage: execute_training_job
22 |       displayName: execute_training_job
23 |       dependsOn: 
24 |       - variable_generation
25 |       - build_validation
26 |       variables:
27 |       - template: templates/experiment_variables.yml
28 |       jobs:
29 |       - job: Execute_ml_Job_Pipeline
30 |         steps:
31 |         - template: templates/get_connection_details.yml
32 |         - template: templates/configure_azureml_agent.yml
33 |         - template: templates/execute_mlops_pipeline.yml
34 |           parameters:
35 |             script_parameter: |
36 |               python -m mlops.${{ parameters.model_type }}.src.mlops_pipeline \
37 |                 --subscription_id $(SUBSCRIPTION_ID) \
38 |                 --resource_group_name $(RESOURCE_GROUP_NAME) \
39 |                 --workspace_name $(WORKSPACE_NAME) \
40 |                 --cluster_name $(CLUSTER_NAME) \
41 |                 --cluster_size $(CLUSTER_SIZE) \
42 |                 --cluster_region $(CLUSTER_REGION) \
43 |                 --build_reference $(BUILD.BUILDID) \
44 |                 --deploy_environment ${{parameters.exec_environment}} \
45 |                 --experiment_name $(EXPERIMENT_NAME) \
46 |                 --display_name $(DISPLAY_NAME) \
47 |                 --wait_for_completion True \
48 |                 --environment_name $(ENVIRONMENT_NAME) \
49 |                 --env_base_image_name $(ENV_BASE_IMAGE_NAME) \
50 |                 --model_name $(MODEL_NAME) \
51 |                 --conda_path $(CONDA_PATH)
52 | 


--------------------------------------------------------------------------------
/devops/pipeline/requirements/build_validation_requirements.txt:
--------------------------------------------------------------------------------
 1 | flake8-docstrings==1.6.0
 2 | flake8==4.0.1
 3 | pep8-naming==0.13.0
 4 | pytest-cov==3.0.0
 5 | pytest-azurepipelines==1.0.3
 6 | pytest-mock==3.7.0
 7 | pytest==7.1.2
 8 | mlflow==1.27.0
 9 | azure-ai-ml==1.5.0
10 | azure-identity==1.11.0
11 | mldesigner==0.1.0b4
12 | 


--------------------------------------------------------------------------------
/devops/pipeline/requirements/execute_job_requirements.txt:
--------------------------------------------------------------------------------
 1 | azure-cli==2.53.0
 2 | azure-ai-ml==1.5.0
 3 | azure-identity==1.11.0
 4 | flake8-docstrings==1.6.0
 5 | flake8==4.0.1
 6 | pep8-naming==0.13.0
 7 | pytest-cov==3.0.0
 8 | pytest-azurepipelines==1.0.3
 9 | pytest-mock==3.7.0
10 | pytest==7.1.2
11 | mlflow==2.7.1
12 | mldesigner==0.1.0b4


--------------------------------------------------------------------------------
/devops/pipeline/templates/configure_azureml_agent.yml:
--------------------------------------------------------------------------------
 1 | steps:
 2 | - task: UsePythonVersion@0
 3 |   displayName: 'Use Python 3.8'
 4 |   inputs:
 5 |     versionSpec: '3.8'
 6 | 
 7 | - task: AzureCLI@2
 8 |   displayName: Install Job Requirements
 9 |   inputs:
10 |     azureSubscription: $(AZURE_RM_SVC_CONNECTION)
11 |     scriptType: bash
12 |     scriptLocation: inlineScript
13 |     workingDirectory: $(System.DefaultWorkingDirectory)
14 |     inlineScript: |
15 |       set -e # fail on error
16 |       python -m pip install --upgrade pip
17 |       pip install -r devops/pipeline/requirements/execute_job_requirements.txt
18 |       az version
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/devops/pipeline/templates/execute_mlops_pipeline.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 |   - name: script_parameter
 3 |     type: string
 4 | 
 5 | steps:
 6 |   - task: AzureCLI@2
 7 |     name: submit_aml_job_task
 8 |     displayName: Execute Azure ML pipeline job
 9 |     continueOnError: false
10 |     env: {APPLICATIONINSIGHTS_CONNECTION_STRING: "$(APPLICATIONINSIGHTS-CONNECTION-STRING)"}
11 |     inputs: 
12 |       azureSubscription: $(AZURE_RM_SVC_CONNECTION)
13 |       scriptType: bash
14 |       workingDirectory: $(System.DefaultWorkingDirectory)
15 |       scriptLocation: inlineScript
16 |       inlineScript: |
17 |         ${{parameters.script_parameter}}
18 | 


--------------------------------------------------------------------------------
/devops/pipeline/templates/experiment_variables.yml:
--------------------------------------------------------------------------------
 1 | variables:
 2 | - name:  ML_MODEL_CONFIG_NAME
 3 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ML_MODEL_CONFIG_NAME'] ]
 4 | - name:  KEYVAULT_NAME
 5 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.KEYVAULT_NAME'] ]   
 6 | - name:  EXPERIMENT_BASE_NAME
 7 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.EXPERIMENT_BASE_NAME'] ] 
 8 | - name:  ENVIRONMENT_NAME
 9 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ENVIRONMENT_NAME'] ]
10 | - name:  ENV_BASE_IMAGE_NAME
11 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ENV_BASE_IMAGE_NAME'] ]
12 | - name:  DISPLAY_BASE_NAME
13 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.DISPLAY_BASE_NAME'] ]
14 | - name: CONDA_PATH
15 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CONDA_PATH'] ]        
16 | - name:  CLUSTER_SIZE
17 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_SIZE'] ] 
18 | - name:  CLUSTER_REGION
19 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_REGION'] ] 
20 | - name:  CLUSTER_NAME
21 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_NAME'] ] 
22 | - name:  AZURE_RM_SVC_CONNECTION
23 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.AZURE_RM_SVC_CONNECTION'] ] 
24 | - name:  MODEL_BASE_NAME
25 |   value: $[ dependencies.variable_generation.load_config_variables.outputs['loading_model_config.MODEL_BASE_NAME'] ]
26 | - name:  RESOURCE_GROUP_NAME
27 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.RESOURCE_GROUP_NAME'] ]       
28 | - name:  WORKSPACE_NAME
29 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.WORKSPACE_NAME'] ]
30 | - name: EXPERIMENT_NAME
31 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.EXPERIMENT_NAME'] ]
32 | - name: DISPLAY_NAME
33 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.DISPLAY_NAME'] ]
34 | - name: MODEL_NAME
35 |   value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.MODEL_NAME'] ] 


--------------------------------------------------------------------------------
/devops/pipeline/templates/get_connection_details.yml:
--------------------------------------------------------------------------------
 1 | steps:
 2 | - task: AzureCLI@2
 3 |   name: retrieveAzureServiceConnection
 4 |   displayName: Retrieve Azure Service Connection
 5 |   inputs:
 6 |     azureSubscription: $(AZURE_RM_SVC_CONNECTION)
 7 |     scriptLocation: inlineScript
 8 |     scriptType: bash
 9 |     inlineScript: |
10 |       export subscriptionId=$(az account show --query id -o tsv)
11 |       echo "##vso[task.setvariable variable=SUBSCRIPTION_ID]$subscriptionId"
12 |       echo "##vso[task.setvariable variable=TENANT_ID]$tenantId"
13 |     addSpnToEnvironment: true


--------------------------------------------------------------------------------
/devops/pipeline/templates/variables_template.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 |  - name: env_name
 3 |    displayName: "Execution Environment"
 4 |  - name: model_type
 5 |    displayName: "type of model to execute"
 6 | 
 7 | stages:
 8 | - stage: variable_generation
 9 |   jobs:
10 |   - job: load_config_variables
11 |     steps:
12 |     - powershell: |
13 |         $json = Get-Content -Raw -Path '$(System.DefaultWorkingDirectory)/config/model_config.json' | ConvertFrom-Json
14 |         $firstElement = $json.models | Where-Object {($_.ML_MODEL_CONFIG_NAME -eq "${{ parameters.model_type }}") -and ($_.ENV_NAME -eq "${{ parameters.env_name }}")} | Select-Object -First 1
15 |         
16 |         Write-Output $firstElement.KEYVAULT_NAME
17 |         
18 |         foreach ($property in $firstElement.PSObject.Properties) {
19 |           $pname = $property.Name
20 |           $pvalue = $property.Value
21 |           Write-Output "##vso[task.setvariable variable=$pname;isoutput=true]$pvalue"
22 |           }
23 |         
24 |         $EXPERIMENT_NAME =  "${{ parameters.model_type }}" + "_" + "$($firstElement.EXPERIMENT_BASE_NAME)" + "_" + "${{parameters.env_name}}" + "_" + "$(Build.SourceBranchName)"
25 |         Write-Output "##vso[task.setvariable variable=EXPERIMENT_NAME;isoutput=true]$EXPERIMENT_NAME"
26 |         
27 |         $DISPLAY_NAME =  "${{ parameters.model_type }}" + "_" + "$($firstElement.DISPLAY_BASE_NAME)" + "_" + "${{parameters.env_name}}" + "_" + "$(Build.BuildID)"
28 |         Write-Output "##vso[task.setvariable variable=DISPLAY_NAME;isoutput=true]$DISPLAY_NAME"
29 |         Write-Output $DISPLAY_NAME
30 |         $MODEL_NAME =  "${{ parameters.model_type }}" + "_" + "$($firstElement.MODEL_BASE_NAME)" + "_" + "${{parameters.env_name}}" + "_" + "$(Build.SourceBranchName)"
31 |         Write-Output "##vso[task.setvariable variable=MODEL_NAME;isoutput=true]$MODEL_NAME"  
32 |       name: loading_model_config
33 | 
34 |   - job: validate_assign_variables
35 |     dependsOn: load_config_variables
36 |     variables:
37 |     - name:  ML_MODEL_CONFIG_NAME
38 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.ML_MODEL_CONFIG_NAME'] ]
39 |     - name:  KEYVAULT_NAME
40 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.KEYVAULT_NAME'] ] 
41 |     - name:  EXPERIMENT_BASE_NAME
42 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.EXPERIMENT_BASE_NAME'] ] 
43 |     - name:  ENVIRONMENT_NAME
44 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.ENVIRONMENT_NAME'] ]
45 |     - name:  ENV_BASE_IMAGE_NAME
46 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.ENV_BASE_IMAGE_NAME'] ]
47 |     - name:  DISPLAY_BASE_NAME
48 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.DISPLAY_BASE_NAME'] ]
49 |     - name: CONDA_PATH
50 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.CONDA_PATH'] ]        
51 |     - name:  CLUSTER_SIZE
52 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_SIZE'] ] 
53 |     - name:  CLUSTER_REGION
54 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_REGION'] ] 
55 |     - name:  CLUSTER_NAME
56 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_NAME'] ] 
57 |     - name:  AZURE_RM_SVC_CONNECTION
58 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.AZURE_RM_SVC_CONNECTION'] ] 
59 |     - name:  MODEL_BASE_NAME
60 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.MODEL_BASE_NAME'] ]
61 |     - name:  RESOURCE_GROUP_NAME
62 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.RESOURCE_GROUP_NAME'] ]
63 |     - name:  WORKSPACE_NAME
64 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.WORKSPACE_NAME'] ]
65 |     - name: EXPERIMENT_NAME
66 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.EXPERIMENT_NAME'] ]
67 |     - name: DISPLAY_NAME
68 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.DISPLAY_NAME'] ]
69 |     - name: MODEL_NAME
70 |       value: $[ dependencies.load_config_variables.outputs['loading_model_config.MODEL_NAME'] ]  
71 |     steps:
72 |     - script: |
73 |         if [ -z "$(ML_MODEL_CONFIG_NAME)" ]
74 |         then
75 |           echo "variables are not available. Check parameter values or config json file for valid values.."
76 |           exit 1
77 |         else
78 |           echo "variables were loaded from config file.."
79 |           printenv
80 |         fi
81 |       name: validate_variable_load
82 |     - task: AzureKeyVault@2
83 |       continueOnError: false
84 |       inputs:
85 |         azureSubscription: $(AZURE_RM_SVC_CONNECTION)
86 |         KeyVaultName: $(KEYVAULT_NAME)
87 |         SecretsFilter: '*'
88 |         RunAsPreJob: false
89 |       name: load_keyvault_secrets
90 | 
91 |           
92 | 


--------------------------------------------------------------------------------
/devops/pipeline/templates/wait_with_extension_job.yml:
--------------------------------------------------------------------------------
 1 | jobs:
 2 | - job: WaitForCallback
 3 |   pool: server
 4 |   timeoutInMinutes: 0
 5 |   dependsOn: Execute_Job_Pipeline
 6 |   variables:
 7 |     - name: run_name_from_submit_job
 8 |       value:  $[ dependencies.Execute_Job_Pipeline.outputs['read_run_id.RUN_NAME'] ] 
 9 | 
10 |   steps:
11 |   - task: AzureMLJobWaitTask@0
12 |     inputs:
13 |       serviceConnection: $(AZURE_RM_SVC_CONNECTION)
14 |       resourceGroupName: $(RESOURCE_GROUP_NAME)
15 |       azureMLWorkspaceName: $(WORKSPACE_NAME)
16 |       azureMLWorkspaceLocation: $(CLUSTER_REGION)
17 |       azureMLJobName: $(run_name_from_submit_job)
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/images/ConceptualDesign.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/ConceptualDesign.png


--------------------------------------------------------------------------------
/docs/images/DataCollection_Design.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/DataCollection_Design.png


--------------------------------------------------------------------------------
/docs/images/MLModelFactory_Design.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/MLModelFactory_Design.png


--------------------------------------------------------------------------------
/docs/images/Orchestrated_Inferencing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/Orchestrated_Inferencing.png


--------------------------------------------------------------------------------
/docs/images/ProblemStatement_FishBone.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/ProblemStatement_FishBone.png


--------------------------------------------------------------------------------
/docs/images/UseCaseBuilder_Design.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/docs/images/UseCaseBuilder_Design.png


--------------------------------------------------------------------------------
/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/image.png


--------------------------------------------------------------------------------
/mlops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/mlops/__init__.py


--------------------------------------------------------------------------------
/mlops/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/mlops/common/__init__.py


--------------------------------------------------------------------------------
/mlops/common/get_compute.py:
--------------------------------------------------------------------------------
 1 | from azure.ai.ml import MLClient
 2 | from azure.identity import DefaultAzureCredential
 3 | import argparse
 4 | from azure.ai.ml.entities import AmlCompute
 5 | from mlops.common.logger import get_logger
 6 | 
 7 | logger = get_logger()
 8 | 
 9 | def get_compute(
10 |     subscription_id: str,
11 |     resource_group_name: str,
12 |     workspace_name: str,
13 |     cluster_name: str,
14 |     cluster_size: str,
15 |     cluster_region: str,
16 |     min_instances: int,
17 |     max_instances: int,
18 |     idle_time_before_scale_down: int,
19 | ):
20 |     compute_object = None
21 |     try:
22 |         client = MLClient(
23 |             DefaultAzureCredential(),
24 |             subscription_id=subscription_id,
25 |             resource_group_name=resource_group_name,
26 |             workspace_name=workspace_name,
27 |         )
28 |         try:
29 |             compute_object = client.compute.get(cluster_name)
30 |             logger.info(f"Found existing compute target {cluster_name}, so using it.")
31 |         except:
32 |             logger.info(f"{cluster_name} is not found! Trying to create a new one.")
33 |             compute_object = AmlCompute(
34 |                 name=cluster_name,
35 |                 type="amlcompute",
36 |                 size=cluster_size,
37 |                 location=cluster_region,
38 |                 min_instances=min_instances,
39 |                 max_instances=max_instances,
40 |                 idle_time_before_scale_down=idle_time_before_scale_down,
41 |             )
42 |             compute_object = client.compute.begin_create_or_update(
43 |                 compute_object
44 |             ).result()
45 |             logger.info(f"A new cluster {cluster_name} has been created.")
46 |     except Exception as ex:
47 |         logger.exception("Oops!  invalid credentials.. Try again...")
48 |         raise
49 |     return compute_object
50 | 
51 | 
52 | def main():
53 |     parser = argparse.ArgumentParser("get_compute")
54 |     parser.add_argument("--subscription_id", type=str, help="Azure subscription id")
55 |     parser.add_argument(
56 |         "--resource_group_name", type=str, help="Azure Machine learning resource group"
57 |     )
58 |     parser.add_argument(
59 |         "--workspace_name", type=str, help="Azure Machine learning Workspace name"
60 |     )
61 |     parser.add_argument(
62 |         "--cluster_name", type=str, help="Azure Machine learning cluster name"
63 |     )
64 |     parser.add_argument(
65 |         "--cluster_size", type=str, help="Azure Machine learning cluster size"
66 |     )
67 |     parser.add_argument(
68 |         "--cluster_region", type=str, help="Azure Machine learning cluster region"
69 |     )
70 |     parser.add_argument("--min_instances", type=int, default=0)
71 |     parser.add_argument("--max_instances", type=int, default=4)
72 |     parser.add_argument("--idle_time_before_scale_down", type=int, default=120)
73 | 
74 |     args = parser.parse_args()
75 |     get_compute(
76 |         args.subscription_id,
77 |         args.resource_group_name,
78 |         args.workspace_name,
79 |         args.cluster_name,
80 |         args.cluster_size,
81 |         args.cluster_region,
82 |         args.min_instances,
83 |         args.max_instances,
84 |         args.idle_time_before_scale_down,
85 |     )
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/mlops/common/get_environment.py:
--------------------------------------------------------------------------------
 1 | from azure.ai.ml import MLClient
 2 | from azure.identity import DefaultAzureCredential
 3 | import argparse
 4 | from azure.ai.ml.entities import Environment
 5 | from mlops.common.logger import get_logger
 6 | 
 7 | logger = get_logger()
 8 | 
 9 | 
10 | def get_environment(
11 |     subscription_id: str,
12 |     resource_group_name: str,
13 |     workspace_name: str,
14 |     env_base_image_name: str,
15 |     conda_path: str,
16 |     environment_name: str,
17 |     description: str,
18 | ):
19 |     try:
20 |         logger.info(f"Checking {environment_name} environment.")
21 |         client = MLClient(
22 |             DefaultAzureCredential(),
23 |             subscription_id=subscription_id,
24 |             resource_group_name=resource_group_name,
25 |             workspace_name=workspace_name,
26 |         )
27 |         env_docker_conda = Environment(
28 |             image=env_base_image_name,
29 |             conda_file=conda_path,
30 |             name=environment_name,
31 |             description=description,
32 |         )
33 |         environment = client.environments.create_or_update(env_docker_conda)
34 |         logger.info(f"Environment {environment_name} has been created or updated.")
35 |         return environment
36 | 
37 |     except Exception as ex:
38 |         logger.exception(
39 |             "Oops! invalid credentials or error while creating ML environment.. Try again..."
40 |         )
41 |         raise
42 | 
43 | 
44 | def main():
45 |     parser = argparse.ArgumentParser("prepare_environment")
46 |     parser.add_argument("--subscription_id", type=str, help="Azure subscription id")
47 |     parser.add_argument(
48 |         "--resource_group_name", type=str, help="Azure Machine learning resource group"
49 |     )
50 |     parser.add_argument(
51 |         "--workspace_name", type=str, help="Azure Machine learning Workspace name"
52 |     )
53 |     parser.add_argument(
54 |         "--env_base_image_name", type=str, help="Environment custom base image name"
55 |     )
56 |     parser.add_argument(
57 |         "--conda_path", type=str, help="path to conda requirements file"
58 |     )
59 |     parser.add_argument(
60 |         "--environment_name", type=str, help="Azure Machine learning environment name"
61 |     )
62 |     parser.add_argument(
63 |         "--description", type=str, default="Environment created using Conda."
64 |     )
65 |     args = parser.parse_args()
66 | 
67 |     get_environment(
68 |         args.subscription_id,
69 |         args.resource_group_name,
70 |         args.workspace_name,
71 |         args.env_base_image_name,
72 |         args.conda_path,
73 |         args.environment_name,
74 |         args.description,
75 |     )
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     main()
80 | 


--------------------------------------------------------------------------------
/mlops/common/get_workspace.py:
--------------------------------------------------------------------------------
 1 | from azure.ai.ml import MLClient
 2 | from azure.identity import DefaultAzureCredential
 3 | import argparse
 4 | from mlops.common.logger import get_logger
 5 | 
 6 | logger = get_logger()
 7 | 
 8 | def get_workspace(subscription_id: str, resource_group_name: str, workspace_name: str):
 9 |     try:
10 |         logger.info(f"Getting access to {workspace_name} workspace.")
11 |         client = MLClient(
12 |             DefaultAzureCredential(),
13 |             subscription_id=subscription_id,
14 |             resource_group_name=resource_group_name,
15 |             workspace_name=workspace_name,
16 |         )
17 | 
18 |         workspace = client.workspaces.get(workspace_name)
19 |         logger.info(f"Reference to {workspace_name} has been obtained.")
20 |         return workspace
21 |     except Exception as ex:
22 |         logger.exception("Oops!  invalid credentials.. Try again...")
23 |         raise
24 | 
25 | 
26 | def main():
27 |     parser = argparse.ArgumentParser("get_workspace")
28 |     parser.add_argument("--subscription_id", type=str, help="Azure subscription id")
29 |     parser.add_argument(
30 |         "--resource_group_name", type=str, help="Azure Machine learning resource group"
31 |     )
32 |     parser.add_argument(
33 |         "--workspace_name", type=str, help="Azure Machine learning Workspace name"
34 |     )
35 | 
36 |     args = parser.parse_args()
37 |     get_workspace(args.subscription_id, args.resource_group_name, args.workspace_name)
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     main()
42 | 


--------------------------------------------------------------------------------
/mlops/common/logger.py:
--------------------------------------------------------------------------------
 1 | """Reusable logger for model_factory."""
 2 | import logging
 3 | import sys
 4 | 
 5 | 
 6 | def get_logger(name: str = "mlops", level: int = logging.INFO) -> logging.Logger:
 7 |     """Get logger.
 8 | 
 9 |     Args:
10 |         name (str, optional): Logger name. Defaults to "mlops".
11 |         level (int, optional): Log level. Defaults to logging.INFO.
12 | 
13 |     Returns:
14 |         logging.Logger: named logger.
15 |     """
16 |     logger = logging.getLogger(name)
17 |     if logger.hasHandlers():
18 |         return logger
19 | 
20 |     handler = logging.StreamHandler(sys.stdout)
21 |     formatter = logging.Formatter(
22 |         "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
23 |     )
24 |     handler.setFormatter(formatter)
25 | 
26 |     logger.setLevel(level)
27 |     logger.addHandler(handler)
28 | 
29 |     return logger
30 | 


--------------------------------------------------------------------------------
/mlops/london_taxi/components/predict.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: predict_taxi_fares
 3 | version: 1
 4 | display_name: PredictTaxiFares
 5 | type: command
 6 | inputs:
 7 |   model_input:
 8 |     type: mlflow_model
 9 |   test_data:
10 |     type: uri_folder
11 | outputs:
12 |   predictions:
13 |     type: uri_folder
14 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
15 | code: ./../../../
16 | command: >-
17 |   python -m src.london_src.predict.predict 
18 |   --model_input ${{inputs.model_input}} 
19 |   --test_data ${{inputs.test_data}}
20 |   --predictions ${{outputs.predictions}}
21 | 
22 | 


--------------------------------------------------------------------------------
/mlops/london_taxi/components/prep.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: prepare_taxi_data
 3 | display_name: PrepTaxiData
 4 | version: 1
 5 | type: command
 6 | inputs:
 7 |   raw_data: 
 8 |     type: uri_folder 
 9 | outputs:
10 |   prep_data:
11 |     type: uri_folder
12 | code: ./../../../
13 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
14 | environment_variables:
15 |   ritesh: modi
16 | command: >-
17 |   python -m src.london_src.prep.prep 
18 |   --raw_data ${{inputs.raw_data}} 
19 |   --prep_data ${{outputs.prep_data}}
20 | 
21 | 


--------------------------------------------------------------------------------
/mlops/london_taxi/components/register.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: register_taxi_model
 3 | display_name: RegisterTaxiModel
 4 | version: 1
 5 | type: command
 6 | inputs:
 7 |   model_metadata:
 8 |     type: uri_folder
 9 |   model_name:
10 |     type: string
11 |   score_report:
12 |     type: uri_folder
13 |   build_reference:
14 |     type: string
15 | code: ./../../../
16 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
17 | command: >-
18 |   python -m src.london_src.register.register 
19 |   --model_metadata ${{inputs.model_metadata}}  
20 |   --model_name ${{inputs.model_name}}
21 |   --score_report ${{inputs.score_report}}
22 |   --build_reference ${{inputs.build_reference}}
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/mlops/london_taxi/components/score.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: score_model
 3 | version: 1
 4 | display_name: ScoreModel
 5 | type: command
 6 | inputs:
 7 |   predictions:
 8 |     type: uri_folder
 9 |   model:
10 |     type: uri_folder
11 | outputs:
12 |   score_report:
13 |     type: uri_folder
14 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
15 | code: ./../../../
16 | command: >-
17 |   python -m src.london_src.score.score 
18 |   --predictions ${{inputs.predictions}} 
19 |   --model ${{inputs.model}} 
20 |   --score_report ${{outputs.score_report}}
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/mlops/london_taxi/components/train.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: train_linear_regression_model
 3 | display_name: TrainLinearRegressionModel
 4 | version: 1
 5 | type: command
 6 | inputs:
 7 |   training_data: 
 8 |     type: uri_folder
 9 | outputs:
10 |   model_output:
11 |     type: uri_folder
12 |   test_data:
13 |     type: uri_folder
14 |   model_metadata:
15 |     type: uri_file
16 | code: ./../../../
17 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
18 | command: >-
19 |   python -m src.london_src.train.train
20 |   --training_data ${{inputs.training_data}} 
21 |   --test_data ${{outputs.test_data}} 
22 |   --model_output ${{outputs.model_output}}
23 |   --model_metadata ${{outputs.model_metadata}}
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/mlops/london_taxi/components/transform.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: taxi_feature_engineering
 3 | display_name: TaxiFeatureEngineering
 4 | version: 1
 5 | type: command
 6 | inputs:
 7 |   clean_data: 
 8 |     type: uri_folder
 9 | outputs:
10 |   transformed_data:
11 |     type: uri_folder
12 | code: ./../../../
13 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
14 | command: >-
15 |   python -m src.london_src.transform.transform
16 |   --clean_data ${{inputs.clean_data}} 
17 |   --transformed_data ${{outputs.transformed_data}}
18 | 
19 | 


--------------------------------------------------------------------------------
/mlops/london_taxi/environment/conda.yml:
--------------------------------------------------------------------------------
 1 | name: prs-env
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip
 7 |   - pip:
 8 |     - pandas
 9 |     - scikit-learn==1.2.0
10 |     - mlflow>=2.7.1
11 |     - azureml-mlflow>=1.51
12 |     - mldesigner==0.1.0b4
13 |     - azure-ai-ml==1.5.0
14 |     - azure-identity==1.11.0
15 |     - azure-keyvault-secrets==4.6.0
16 | 


--------------------------------------------------------------------------------
/mlops/london_taxi/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/mlops/london_taxi/src/__init__.py


--------------------------------------------------------------------------------
/mlops/nyc_taxi/components/predict.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: predict_taxi_fares
 3 | version: 1
 4 | display_name: PredictTaxiFares
 5 | type: command
 6 | inputs:
 7 |   model_input:
 8 |     type: mlflow_model
 9 |   test_data:
10 |     type: uri_folder
11 | outputs:
12 |   predictions:
13 |     type: uri_folder
14 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
15 | code: ./../../../
16 | command: >-
17 |   python -m src.nyc_src.predict.predict 
18 |   --model_input ${{inputs.model_input}} 
19 |   --test_data ${{inputs.test_data}}
20 |   --predictions ${{outputs.predictions}}
21 | 
22 | 


--------------------------------------------------------------------------------
/mlops/nyc_taxi/components/prep.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: prepare_taxi_data
 3 | display_name: PrepTaxiData
 4 | version: 1
 5 | type: command
 6 | inputs:
 7 |   raw_data: 
 8 |     type: uri_folder 
 9 | outputs:
10 |   prep_data:
11 |     type: uri_folder
12 | code: ./../../../
13 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
14 | environment_variables:
15 |   ritesh: modi
16 | command: >-
17 |   python -m src.nyc_src.prep.prep 
18 |   --raw_data ${{inputs.raw_data}} 
19 |   --prep_data ${{outputs.prep_data}}
20 | 
21 | 


--------------------------------------------------------------------------------
/mlops/nyc_taxi/components/register.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: register_taxi_model
 3 | display_name: RegisterTaxiModel
 4 | version: 1
 5 | type: command
 6 | inputs:
 7 |   model_metadata:
 8 |     type: uri_folder
 9 |   model_name:
10 |     type: string
11 |   score_report:
12 |     type: uri_folder
13 |   build_reference:
14 |     type: string
15 | code: ./../../../
16 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
17 | command: >-
18 |   python -m src.nyc_src.register.register 
19 |   --model_metadata ${{inputs.model_metadata}}  
20 |   --model_name ${{inputs.model_name}}
21 |   --score_report ${{inputs.score_report}}
22 |   --build_reference ${{inputs.build_reference}}
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/mlops/nyc_taxi/components/score.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: score_model
 3 | version: 1
 4 | display_name: ScoreModel
 5 | type: command
 6 | inputs:
 7 |   predictions:
 8 |     type: uri_folder
 9 |   model:
10 |     type: uri_folder
11 | outputs:
12 |   score_report:
13 |     type: uri_folder
14 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
15 | code: ./../../../
16 | command: >-
17 |   python -m src.nyc_src.score.score 
18 |   --predictions ${{inputs.predictions}} 
19 |   --model ${{inputs.model}} 
20 |   --score_report ${{outputs.score_report}}
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/mlops/nyc_taxi/components/train.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: train_linear_regression_model
 3 | display_name: TrainLinearRegressionModel
 4 | version: 1
 5 | type: command
 6 | inputs:
 7 |   training_data: 
 8 |     type: uri_folder
 9 | outputs:
10 |   model_output:
11 |     type: uri_folder
12 |   test_data:
13 |     type: uri_folder
14 |   model_metadata:
15 |     type: uri_file
16 | code: ./../../../
17 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
18 | command: >-
19 |   python -m src.nyc_src.train.train
20 |   --training_data ${{inputs.training_data}} 
21 |   --test_data ${{outputs.test_data}} 
22 |   --model_output ${{outputs.model_output}}
23 |   --model_metadata ${{outputs.model_metadata}}
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/mlops/nyc_taxi/components/transform.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: taxi_feature_engineering
 3 | display_name: TaxiFeatureEngineering
 4 | version: 1
 5 | type: command
 6 | inputs:
 7 |   clean_data: 
 8 |     type: uri_folder
 9 | outputs:
10 |   transformed_data:
11 |     type: uri_folder
12 | code: ./../../../
13 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
14 | command: >-
15 |   python -m src.nyc_src.transform.transform
16 |   --clean_data ${{inputs.clean_data}} 
17 |   --transformed_data ${{outputs.transformed_data}}
18 | 
19 | 


--------------------------------------------------------------------------------
/mlops/nyc_taxi/environment/conda.yml:
--------------------------------------------------------------------------------
 1 | name: prs-env
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip
 7 |   - pip:
 8 |     - pandas
 9 |     - scikit-learn==1.2.0
10 |     - mlflow>=2.7.1
11 |     - azureml-mlflow>=1.51
12 |     - mldesigner==0.1.0b4
13 |     - azure-ai-ml==1.5.0
14 |     - azure-identity==1.11.0
15 |     - azure-keyvault-secrets==4.6.0
16 | 


--------------------------------------------------------------------------------
/mlops/nyc_taxi/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/mlops/nyc_taxi/src/__init__.py


--------------------------------------------------------------------------------
/model/london_taxi/dockerfile:
--------------------------------------------------------------------------------
 1 | FROM mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cuda11.6.2-gpu-inference:latest
 2 | 
 3 | ARG model_folder
 4 | ARG model_registered_name
 5 | ARG model_file_name
 6 | ARG score_file 
 7 | 
 8 | ENV MODEL_FILE_NAME=$model_file_name
 9 | ENV MODEL_LOG_PATH=/var/azureml-app/logs/
10 | ENV MODEL_NAME=$model_registered_name
11 | 
12 | COPY environment/requirements.txt ./requirements.txt
13 | 
14 | RUN  pip install -r ./requirements.txt
15 | 
16 | RUN mkdir -p /var/azureml-app/azureml-models
17 | RUN mkdir -p /var/azureml-app/logs/
18 | # score file
19 | COPY scoring/$score_file /var/azureml-app/$score_file
20 | ENV AZUREML_ENTRY_SCRIPT=$score_file
21 | 
22 | # Model
23 | COPY $model_registered_name/$model_file_name /var/azureml-app/azureml-models/$model_file_name
24 | ENV AZUREML_MODEL_DIR=/var/azureml-app/azureml-models
25 | 
26 | CMD ["runsvdir","/var/runit"]


--------------------------------------------------------------------------------
/model/london_taxi/environment/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.2
2 | pip==21.2.4
3 | scikit-learn==0.24.2
4 | scipy==1.7.1
5 | azureml-defaults==1.38.0
6 | joblib==1.0.1


--------------------------------------------------------------------------------
/model/london_taxi/pipeline-requirements.txt:
--------------------------------------------------------------------------------
1 | azure-cli==2.41


--------------------------------------------------------------------------------
/model/london_taxi/sample-request.json:
--------------------------------------------------------------------------------
1 | {"data": [
2 |     [0.9,40.73394012451172,-74.00725555419922,1,40.73118209838867,-74.00128173828125,0,1,6,1,10,11,50,31,6,1,10,11,55,51],
3 |     [4.7,40.71599197387695,-73.99481964111328,1,40.71211624145508,-73.9439697265625,0,1,3,1,7,23,2,35,3,1,7,23,16,39],
4 |     [1.22,40.69337844848633,-73.97087860107422,1,40.692501068115234,-73.98727416992188,0,2,5,1,30,21,4,7,5,1,30,21,9,9]
5 | ]}


--------------------------------------------------------------------------------
/model/london_taxi/scoring/score.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import numpy
 4 | import joblib
 5 | import csv
 6 | import datetime
 7 | from mlops.common.logger import get_logger
 8 | 
 9 | logger = get_logger("london_taxi_score")
10 | 
11 | def init():
12 |     """
13 |     This function is called when the container is initialized/started, typically after create/update of the deployment.
14 |     You can write the logic here to perform init operations like caching the model in memory
15 |     """
16 |     global model
17 | 
18 |     
19 | 
20 |     model_path = os.path.join(
21 |         os.getenv("AZUREML_MODEL_DIR"), os.environ["MODEL_FILE_NAME"]
22 |     )
23 |     # deserialize the model file back into a sklearn model
24 |     model = joblib.load(model_path)
25 |     logger.info("Init complete")
26 | 
27 | 
28 | def run(raw_data):
29 |     """
30 |     This function is called for every invocation of the endpoint to perform the actual scoring/prediction.
31 |     In the example we extract the data from the json input and call the scikit-learn model's predict()
32 |     method and return the result back
33 |     """
34 |     current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
35 |     folder_path = f"{os.environ['MODEL_LOG_PATH']}{os.environ['MODEL_NAME']}/{current_time}"
36 |     if not os.path.exists(folder_path):
37 |         os.makedirs(folder_path)
38 |     csv_input_path = f"{folder_path}/input.csv"
39 |     csv_output_path = f"{folder_path}/output.csv"
40 |     logger.info("model 1: request received")
41 |     data = json.loads(raw_data)["data"]
42 |     data = numpy.array(data)
43 |     numpy.savetxt(csv_input_path, data, delimiter=",")
44 | 
45 |     result = model.predict(data)
46 | 
47 |     numpy.savetxt(csv_output_path, result, delimiter=",")
48 |     logger.info("Request processed")
49 |     return result.tolist()
50 | 


--------------------------------------------------------------------------------
/model/nyc_taxi/dockerfile:
--------------------------------------------------------------------------------
 1 | FROM mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cuda11.6.2-gpu-inference:latest
 2 | 
 3 | ARG model_folder
 4 | ARG model_registered_name
 5 | ARG model_file_name
 6 | ARG score_file 
 7 | 
 8 | ENV MODEL_FILE_NAME=$model_file_name
 9 | ENV MODEL_LOG_PATH=/var/azureml-app/logs/
10 | ENV MODEL_NAME=$model_registered_name
11 | 
12 | COPY environment/requirements.txt ./requirements.txt
13 | 
14 | RUN  pip install -r ./requirements.txt
15 | 
16 | RUN mkdir -p /var/azureml-app/azureml-models
17 | RUN mkdir -p /var/azureml-app/logs/
18 | # score file
19 | COPY scoring/$score_file /var/azureml-app/$score_file
20 | ENV AZUREML_ENTRY_SCRIPT=$score_file
21 | 
22 | # Model
23 | COPY $model_registered_name/$model_file_name /var/azureml-app/azureml-models/$model_file_name
24 | ENV AZUREML_MODEL_DIR=/var/azureml-app/azureml-models
25 | 
26 | CMD ["runsvdir","/var/runit"]


--------------------------------------------------------------------------------
/model/nyc_taxi/environment/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.2
2 | pip==21.2.4
3 | scikit-learn==0.24.2
4 | scipy==1.7.1
5 | azureml-defaults==1.38.0
6 | joblib==1.0.1


--------------------------------------------------------------------------------
/model/nyc_taxi/pipeline-requirements.txt:
--------------------------------------------------------------------------------
1 | azure-cli==2.41


--------------------------------------------------------------------------------
/model/nyc_taxi/sample-request.json:
--------------------------------------------------------------------------------
1 | {"data": [
2 |     [0.9,40.73394012451172,-74.00725555419922,1,40.73118209838867,-74.00128173828125,0,1,6,1,10,11,50,31,6,1,10,11,55,51],
3 |     [4.7,40.71599197387695,-73.99481964111328,1,40.71211624145508,-73.9439697265625,0,1,3,1,7,23,2,35,3,1,7,23,16,39],
4 |     [1.22,40.69337844848633,-73.97087860107422,1,40.692501068115234,-73.98727416992188,0,2,5,1,30,21,4,7,5,1,30,21,9,9]
5 | ]}


--------------------------------------------------------------------------------
/model/nyc_taxi/scoring/score.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import json
 4 | import numpy
 5 | import joblib
 6 | import csv
 7 | import datetime
 8 | from mlops.common.logger import get_logger
 9 | 
10 | logger = get_logger("nyc_taxi_score")
11 | 
12 | def init():
13 |     """
14 |     This function is called when the container is initialized/started, typically after create/update of the deployment.
15 |     You can write the logic here to perform init operations like caching the model in memory
16 |     """
17 |     global model
18 | 
19 |     
20 | 
21 |     model_path = os.path.join(
22 |         os.getenv("AZUREML_MODEL_DIR"), os.environ["MODEL_FILE_NAME"]
23 |     )
24 |     # deserialize the model file back into a sklearn model
25 |     model = joblib.load(model_path)
26 |     logger.info("Init complete")
27 | 
28 | 
29 | def run(raw_data):
30 |     """
31 |     This function is called for every invocation of the endpoint to perform the actual scoring/prediction.
32 |     In the example we extract the data from the json input and call the scikit-learn model's predict()
33 |     method and return the result back
34 |     """
35 |     current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
36 |     folder_path = f"{os.environ['MODEL_LOG_PATH']}{os.environ['MODEL_NAME']}/{current_time}"
37 |     if not os.path.exists(folder_path):
38 |         os.makedirs(folder_path)
39 |     csv_input_path = f"{folder_path}/input.csv"
40 |     csv_output_path = f"{folder_path}/output.csv"
41 |     logger.info("model 1: request received")
42 |     data = json.loads(raw_data)["data"]
43 |     data = numpy.array(data)
44 |     numpy.savetxt(csv_input_path, data, delimiter=",")
45 | 
46 |     result = model.predict(data)
47 | 
48 |     numpy.savetxt(csv_output_path, result, delimiter=",")
49 |     logger.info("Request processed")
50 |     return result.tolist()
51 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/__init__.py


--------------------------------------------------------------------------------
/src/london_src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/__init__.py


--------------------------------------------------------------------------------
/src/london_src/predict/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/predict/__init__.py


--------------------------------------------------------------------------------
/src/london_src/predict/predict.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pandas as pd
  3 | import os
  4 | from pathlib import Path
  5 | from sklearn.linear_model import LinearRegression
  6 | import pickle
  7 | from mlops.common.logger import get_logger
  8 | 
  9 | logger = get_logger("london_taxi_predict")
 10 | 
 11 | def main(model_input, test_data, prediction_path):
 12 |     lines = [
 13 |         f"Model path: {model_input}",
 14 |         f"Test data path: {test_data}",
 15 |         f"Predictions path: {prediction_path}",
 16 |     ]
 17 | 
 18 |     for line in lines:
 19 |         logger.info(line)
 20 | 
 21 |     testX, testy = load_test_data(test_data)
 22 |     predict(testX, testy, model_input, prediction_path)
 23 | 
 24 | 
 25 | # Load and split the test data
 26 | def load_test_data(test_data):
 27 |     logger.info("mounted_path files: ")
 28 |     arr = os.listdir(test_data)
 29 | 
 30 |     logger.info(arr)
 31 |     df_list = []
 32 |     for filename in arr:
 33 |         logger.info("reading file: %s ..." % filename)
 34 |         with open(os.path.join(test_data, filename), "r") as handle:
 35 |             input_df = pd.read_csv((Path(test_data) / filename))
 36 |             df_list.append(input_df)
 37 | 
 38 |     test_data = df_list[0]
 39 |     testy = test_data["cost"]
 40 |     testX = test_data[
 41 |         [
 42 |             "distance",
 43 |             "dropoff_latitude",
 44 |             "dropoff_longitude",
 45 |             "passengers",
 46 |             "pickup_latitude",
 47 |             "pickup_longitude",
 48 |             "store_forward",
 49 |             "vendor",
 50 |             "pickup_weekday",
 51 |             "pickup_month",
 52 |             "pickup_monthday",
 53 |             "pickup_hour",
 54 |             "pickup_minute",
 55 |             "pickup_second",
 56 |             "dropoff_weekday",
 57 |             "dropoff_month",
 58 |             "dropoff_monthday",
 59 |             "dropoff_hour",
 60 |             "dropoff_minute",
 61 |             "dropoff_second",
 62 |         ]
 63 |     ]
 64 |     logger.info(testX.shape)
 65 |     logger.info(testX.columns)
 66 |     return testX, testy
 67 | 
 68 | 
 69 | def predict(testX, testy, model_input, prediction_path):
 70 |     # Load the model from input port
 71 |     model = pickle.load(open((Path(model_input) / "model.sav"), "rb"))
 72 | 
 73 |     # Make predictions on testX data and record them in a column named predicted_cost
 74 |     predictions = model.predict(testX)
 75 |     testX["predicted_cost"] = predictions
 76 |     logger.info(testX.shape)
 77 | 
 78 |     # Compare predictions to actuals (testy)
 79 |     output_data = pd.DataFrame(testX)
 80 |     output_data["actual_cost"] = testy
 81 | 
 82 |     # Save the output data with feature columns, predicted cost, and actual cost in csv file
 83 |     output_data = output_data.to_csv((Path(prediction_path) / "predictions.csv"))
 84 | 
 85 | 
 86 | if __name__ == "__main__":
 87 |     parser = argparse.ArgumentParser("predict")
 88 |     parser.add_argument("--model_input", type=str, help="Path of input model")
 89 |     parser.add_argument("--test_data", type=str, help="Path to test data")
 90 |     parser.add_argument("--predictions", type=str, help="Path of predictions")
 91 | 
 92 |     args = parser.parse_args()
 93 | 
 94 |     logger.info("hello scoring world...")
 95 | 
 96 |     model_input = args.model_input
 97 |     test_data = args.test_data
 98 |     prediction_path = args.predictions
 99 |     main(model_input, test_data, prediction_path)
100 | 


--------------------------------------------------------------------------------
/src/london_src/prep/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/prep/__init__.py


--------------------------------------------------------------------------------
/src/london_src/prep/prep.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from pathlib import Path
  3 | from typing_extensions import Concatenate
  4 | from uuid import uuid4
  5 | from datetime import datetime
  6 | import os
  7 | import pandas as pd
  8 | from sklearn.linear_model import LinearRegression
  9 | from sklearn.model_selection import train_test_split
 10 | import pickle
 11 | from mlops.common.logger import get_logger
 12 | 
 13 | logger = get_logger("london_taxi_prep")
 14 | 
 15 | def main(raw_data, prep_data):
 16 |     logger.info("hello training world...")
 17 | 
 18 |     lines = [
 19 |         f"Raw data path: {raw_data}",
 20 |         f"Data output path: {prep_data}",
 21 |     ]
 22 | 
 23 |     for line in lines:
 24 |         logger.info(line)
 25 | 
 26 |     logger.info("mounted_path files: ")
 27 |     arr = os.listdir(raw_data)
 28 |     logger.info(arr)
 29 | 
 30 |     df_list = []
 31 |     for filename in arr:
 32 |         logger.info("reading file: %s ..." % filename)
 33 |         with open(os.path.join(raw_data, filename), "r") as handle:
 34 |             input_df = pd.read_csv((Path(raw_data) / filename))
 35 |             df_list.append(input_df)
 36 | 
 37 |     # Prep the green and yellow taxi data
 38 |     green_data = df_list[0]
 39 |     yellow_data = df_list[1]
 40 | 
 41 |     data_prep(green_data, yellow_data)
 42 | 
 43 | 
 44 | def data_prep(green_data, yellow_data):
 45 |     # Define useful columns needed for the Azure Machine Learning NYC Taxi tutorial
 46 |     useful_columns = str(
 47 |         [
 48 |             "cost",
 49 |             "distance",
 50 |             "dropoff_datetime",
 51 |             "dropoff_latitude",
 52 |             "dropoff_longitude",
 53 |             "passengers",
 54 |             "pickup_datetime",
 55 |             "pickup_latitude",
 56 |             "pickup_longitude",
 57 |             "store_forward",
 58 |             "vendor",
 59 |         ]
 60 |     ).replace(",", ";")
 61 |     logger.info(useful_columns)
 62 | 
 63 |     # Rename columns as per Azure Machine Learning NYC Taxi tutorial
 64 |     green_columns = str(
 65 |         {
 66 |             "vendorID": "vendor",
 67 |             "lpepPickupDatetime": "pickup_datetime",
 68 |             "lpepDropoffDatetime": "dropoff_datetime",
 69 |             "storeAndFwdFlag": "store_forward",
 70 |             "pickupLongitude": "pickup_longitude",
 71 |             "pickupLatitude": "pickup_latitude",
 72 |             "dropoffLongitude": "dropoff_longitude",
 73 |             "dropoffLatitude": "dropoff_latitude",
 74 |             "passengerCount": "passengers",
 75 |             "fareAmount": "cost",
 76 |             "tripDistance": "distance",
 77 |         }
 78 |     ).replace(",", ";")
 79 | 
 80 |     yellow_columns = str(
 81 |         {
 82 |             "vendorID": "vendor",
 83 |             "tpepPickupDateTime": "pickup_datetime",
 84 |             "tpepDropoffDateTime": "dropoff_datetime",
 85 |             "storeAndFwdFlag": "store_forward",
 86 |             "startLon": "pickup_longitude",
 87 |             "startLat": "pickup_latitude",
 88 |             "endLon": "dropoff_longitude",
 89 |             "endLat": "dropoff_latitude",
 90 |             "passengerCount": "passengers",
 91 |             "fareAmount": "cost",
 92 |             "tripDistance": "distance",
 93 |         }
 94 |     ).replace(",", ";")
 95 | 
 96 |     logger.info("green_columns: " + green_columns)
 97 |     logger.info("yellow_columns: " + yellow_columns)
 98 | 
 99 |     green_data_clean = cleanseData(green_data, green_columns, useful_columns)
100 |     yellow_data_clean = cleanseData(yellow_data, yellow_columns, useful_columns)
101 | 
102 |     # Append yellow data to green data
103 |     combined_df = pd.concat([green_data_clean,yellow_data_clean], ignore_index=True)
104 |     combined_df.reset_index(inplace=True, drop=True)
105 | 
106 |     output_green = green_data_clean.to_csv(
107 |         os.path.join(prep_data, "green_prep_data.csv")
108 |     )
109 |     output_yellow = yellow_data_clean.to_csv(
110 |         os.path.join(prep_data, "yellow_prep_data.csv")
111 |     )
112 |     merged_data = combined_df.to_csv(os.path.join(prep_data, "merged_data.csv"))
113 | 
114 |     logger.info("Finish")
115 | 
116 | 
117 | # These functions ensure that null data is removed from the dataset,
118 | # which will help increase machine learning model accuracy.
119 | def get_dict(dict_str):
120 |     pairs = dict_str.strip("{}").split(";")
121 |     new_dict = {}
122 |     for pair in pairs:
123 |         logger.info(pair)
124 |         key, value = pair.strip().split(":")
125 |         new_dict[key.strip().strip("'")] = value.strip().strip("'")
126 |     return new_dict
127 | 
128 | 
129 | def cleanseData(data, columns, useful_columns):
130 |     useful_columns = [
131 |         s.strip().strip("'") for s in useful_columns.strip("[]").split(";")
132 |     ]
133 |     new_columns = get_dict(columns)
134 | 
135 |     new_df = (data.dropna(how="all").rename(columns=new_columns))[useful_columns]
136 | 
137 |     new_df.reset_index(inplace=True, drop=True)
138 |     return new_df
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     parser = argparse.ArgumentParser()
143 |     parser.add_argument(
144 |         "--raw_data",
145 |         type=str,
146 |         default="../data/raw_data",
147 |         help="Path to raw data",
148 |     )
149 |     parser.add_argument(
150 |         "--prep_data", type=str, default="../data/prep_data", help="Path to prep data"
151 |     )
152 | 
153 |     args = parser.parse_args()
154 |     raw_data = args.raw_data
155 |     prep_data = args.prep_data
156 | 
157 |     main(raw_data, prep_data)
158 | 


--------------------------------------------------------------------------------
/src/london_src/register/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/register/__init__.py


--------------------------------------------------------------------------------
/src/london_src/register/register.py:
--------------------------------------------------------------------------------
 1 | import mlflow
 2 | import argparse
 3 | import os
 4 | import json
 5 | from pathlib import Path
 6 | from mlops.common.logger import get_logger
 7 | 
 8 | logger = get_logger("london_taxi_register")
 9 | 
10 | def main(model_metadata, model_name, score_report, build_reference):
11 |     try:
12 |         run_file = open(args.model_metadata)
13 |         model_metadata = json.load(run_file)
14 |         run_uri = model_metadata["run_uri"]
15 | 
16 | 
17 |         score_file = open(Path(args.score_report) / "score.txt")
18 |         score_data = json.load(score_file)
19 |         cod = score_data["cod"]
20 |         mse = score_data["mse"]
21 |         coff = score_data["coff"]
22 | 
23 |         model_version = mlflow.register_model(run_uri, model_name)
24 | 
25 |         client = mlflow.MlflowClient()
26 |         client.set_model_version_tag(
27 |             name=model_name, version=model_version.version, key="mse", value=mse
28 |         )
29 |         client.set_model_version_tag(
30 |             name=model_name, version=model_version.version, key="coff", value=coff
31 |         )
32 |         client.set_model_version_tag(
33 |             name=model_name, version=model_version.version, key="cod", value=cod
34 |         )
35 |         client.set_model_version_tag(
36 |             name=model_name,
37 |             version=model_version.version,
38 |             key="build_id",
39 |             value=build_reference,
40 |         )
41 | 
42 |         logger.info(model_version)
43 |     except Exception as ex:
44 |         logger.exception("Exception in register model")
45 |         raise
46 |     finally:
47 |         run_file.close()
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     parser = argparse.ArgumentParser("register_model")
52 |     parser.add_argument(
53 |         "--model_metadata",
54 |         type=str,
55 |         help="model metadata on Machine Learning Workspace",
56 |     )
57 |     parser.add_argument("--model_name", type=str, help="model name to be registered")
58 |     parser.add_argument("--score_report", type=str, help="score report for the model")
59 |     parser.add_argument(
60 |         "--build_reference",
61 |         type=str,
62 |         help="Original AzDo build id that initiated experiment",
63 |     )
64 | 
65 |     args = parser.parse_args()
66 | 
67 |     logger.info(args.model_metadata)
68 |     logger.info(args.model_name)
69 |     logger.info(args.score_report)
70 |     logger.info(args.build_reference)
71 | 
72 |     main(args.model_metadata, args.model_name, args.score_report, args.build_reference)
73 | 


--------------------------------------------------------------------------------
/src/london_src/score/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/score/__init__.py


--------------------------------------------------------------------------------
/src/london_src/score/score.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import pandas as pd
 3 | import os
 4 | from pathlib import Path
 5 | from sklearn.linear_model import LinearRegression
 6 | import pickle
 7 | from sklearn.metrics import mean_squared_error, r2_score
 8 | import mlflow
 9 | import json
10 | from mlops.common.logger import get_logger
11 | 
12 | logger = get_logger("london_taxi_score")
13 | 
14 | def main(predictions, model, score_report):
15 |     logger.info("hello scoring world...")
16 | 
17 | 
18 |     lines = [
19 |         f"Model path: {model}",
20 |         f"Predictions path: {predictions}",
21 |         f"Scoring output path: {score_report}",
22 |     ]
23 | 
24 |     for line in lines:
25 |         logger.info(line)
26 | 
27 |     # Load the test data with predicted values
28 | 
29 |     logger.info("mounted_path files: ")
30 |     arr = os.listdir(predictions)
31 | 
32 |     logger.info(arr)
33 |     df_list = []
34 |     for filename in arr:
35 |         logger.info("reading file: %s ..." % filename)
36 |         with open(os.path.join(predictions, filename), "r") as handle:
37 |             input_df = pd.read_csv((Path(predictions) / filename))
38 |             df_list.append(input_df)
39 | 
40 |     test_data = df_list[0]
41 | 
42 |     # Load the model from input port
43 |     model = pickle.load(open((Path(model) / "model.sav"), "rb"))
44 |     write_results(model, predictions, test_data, score_report)
45 | 
46 | 
47 | # Print the results of scoring the predictions against actual values in the test data
48 | def write_results(model, predictions, test_data, score_report):
49 |     # The coefficients
50 |     logger.info("Coefficients: \n", model.coef_)
51 | 
52 |     actuals = test_data["actual_cost"]
53 |     predictions = test_data["predicted_cost"]
54 | 
55 |     mse = mean_squared_error(actuals, predictions)
56 |     r2 = r2_score(actuals, predictions)
57 | 
58 |     mlflow.log_metric("scoring_mse", mse)
59 |     mlflow.log_metric("scoring_r2", r2)
60 | 
61 |     # The mean squared error
62 |     logger.info("Mean squared error: %.2f" % mse)
63 |     # The coefficient of determination: 1 is perfect prediction
64 |     logger.info("Coefficient of determination: %.2f" % r2)
65 |     logger.info("Model: ", model)
66 | 
67 |     # logger.info score report to a text file
68 |     model_score = {
69 |         "mse": mean_squared_error(actuals, predictions),
70 |         "coff": str(model.coef_),
71 |         "cod": r2_score(actuals, predictions),
72 |     }
73 |     with open((Path(score_report) / "score.txt"), "w") as json_file:
74 |         json.dump(model_score, json_file, indent=4)
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     parser = argparse.ArgumentParser("score")
79 |     parser.add_argument(
80 |         "--predictions", type=str, help="Path of predictions and actual data"
81 |     )
82 |     parser.add_argument("--model", type=str, help="Path to model")
83 |     parser.add_argument("--score_report", type=str, help="Path to score report")
84 | 
85 |     args = parser.parse_args()
86 | 
87 |     predictions = args.predictions
88 |     model = args.model
89 |     score_report = args.score_report
90 | 
91 |     main(predictions, model, score_report)
92 | 


--------------------------------------------------------------------------------
/src/london_src/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/train/__init__.py


--------------------------------------------------------------------------------
/src/london_src/train/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from pathlib import Path
  3 | import os
  4 | import pandas as pd
  5 | from sklearn.linear_model import LinearRegression
  6 | from sklearn.model_selection import train_test_split
  7 | import pickle
  8 | import mlflow
  9 | import json
 10 | from mlops.common.logger import get_logger
 11 | 
 12 | logger = get_logger("london_taxi_train")
 13 | 
 14 | 
 15 | def main(training_data, test_data, model_output, model_metadata):
 16 |     logger.info("Hello training world...")
 17 | 
 18 | 
 19 |     lines = [
 20 |         f"Training data path: {training_data}",
 21 |         f"Test data path: {test_data}",
 22 |         f"Model output path: {model_output}",
 23 |         f"Model metadata path: {model_metadata}",
 24 |     ]
 25 | 
 26 |     for line in lines:
 27 |         logger.info(line)
 28 | 
 29 |     logger.info("mounted_path files: ")
 30 |     arr = os.listdir(training_data)
 31 |     logger.info(arr)
 32 | 
 33 |     df_list = []
 34 |     for filename in arr:
 35 |         logger.info("reading file: %s ..." % filename)
 36 |         with open(os.path.join(training_data, filename), "r") as handle:
 37 |             input_df = pd.read_csv((Path(training_data) / filename))
 38 |             df_list.append(input_df)
 39 | 
 40 |     train_data = df_list[0]
 41 |     logger.info(train_data.columns)
 42 | 
 43 |     trainX, testX, trainy, testy = split(train_data)
 44 |     write_test_data(testX, testy)
 45 |     train_model(trainX, trainy)
 46 | 
 47 | 
 48 | def split(train_data):
 49 |     # Split the data into input(X) and output(y)
 50 |     y = train_data["cost"]
 51 |     X = train_data[
 52 |         [
 53 |             "distance",
 54 |             "dropoff_latitude",
 55 |             "dropoff_longitude",
 56 |             "passengers",
 57 |             "pickup_latitude",
 58 |             "pickup_longitude",
 59 |             "store_forward",
 60 |             "vendor",
 61 |             "pickup_weekday",
 62 |             "pickup_month",
 63 |             "pickup_monthday",
 64 |             "pickup_hour",
 65 |             "pickup_minute",
 66 |             "pickup_second",
 67 |             "dropoff_weekday",
 68 |             "dropoff_month",
 69 |             "dropoff_monthday",
 70 |             "dropoff_hour",
 71 |             "dropoff_minute",
 72 |             "dropoff_second",
 73 |         ]
 74 |     ]
 75 | 
 76 |     # Split the data into train and test sets
 77 |     trainX, testX, trainy, testy = train_test_split(
 78 |         X, y, test_size=0.3, random_state=42
 79 |     )
 80 |     logger.info(trainX.shape)
 81 |     logger.info(trainX.columns)
 82 | 
 83 |     return trainX, testX, trainy, testy
 84 | 
 85 | 
 86 | def train_model(trainX, trainy):
 87 |     mlflow.autolog()
 88 |     # Train a Linear Regression Model with the train set
 89 |     with mlflow.start_run() as run:
 90 |         model = LinearRegression().fit(trainX, trainy)
 91 |         logger.info(model.score(trainX, trainy))
 92 | 
 93 |         # Output the model, metadata and test data
 94 |         run_id = mlflow.active_run().info.run_id
 95 |         model_uri = f"runs:/{run_id}/model"
 96 |         model_data = {"run_id": run.info.run_id, "run_uri": model_uri}
 97 |         with open(args.model_metadata, "w") as json_file:
 98 |             json.dump(model_data, json_file, indent=4)
 99 | 
100 |         pickle.dump(model, open((Path(args.model_output) / "model.sav"), "wb"))
101 | 
102 | 
103 | def write_test_data(testX, testy):
104 |     testX["cost"] = testy
105 |     logger.info(testX.shape)
106 |     testX.to_csv((Path(args.test_data) / "test_data.csv"))
107 | 
108 | 
109 | if __name__ == "__main__":
110 |     parser = argparse.ArgumentParser("train")
111 |     parser.add_argument("--training_data", type=str, help="Path to training data")
112 |     parser.add_argument("--test_data", type=str, help="Path to test data")
113 |     parser.add_argument("--model_output", type=str, help="Path of output model")
114 |     parser.add_argument("--model_metadata", type=str, help="Path of model metadata")
115 | 
116 |     args = parser.parse_args()
117 | 
118 |     training_data = args.training_data
119 |     test_data = args.test_data
120 |     model_output = args.model_output
121 |     model_metadata = args.model_metadata
122 | 
123 |     main(training_data, test_data, model_output, model_metadata)
124 | 


--------------------------------------------------------------------------------
/src/london_src/transform/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/london_src/transform/__init__.py


--------------------------------------------------------------------------------
/src/nyc_src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/__init__.py


--------------------------------------------------------------------------------
/src/nyc_src/predict/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/predict/__init__.py


--------------------------------------------------------------------------------
/src/nyc_src/predict/predict.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import pandas as pd
 3 | import os
 4 | from pathlib import Path
 5 | from sklearn.linear_model import LinearRegression
 6 | import pickle
 7 | from mlops.common.logger import get_logger
 8 | 
 9 | logger = get_logger("myc_taxi_predict")
10 | 
11 | def main(model_input, test_data, prediction_path):
12 |     lines = [
13 |         f"Model path: {model_input}",
14 |         f"Test data path: {test_data}",
15 |         f"Predictions path: {prediction_path}",
16 |     ]
17 | 
18 |     for line in lines:
19 |         logger.info(line)
20 | 
21 |     testX, testy = load_test_data(test_data)
22 |     predict(testX, testy, model_input, prediction_path)
23 | 
24 | 
25 | # Load and split the test data
26 | def load_test_data(test_data):
27 |     logger.info("mounted_path files: ")
28 |     arr = os.listdir(test_data)
29 | 
30 |     logger.info(arr)
31 |     df_list = []
32 |     for filename in arr:
33 |         logger.info("reading file: %s ..." % filename)
34 |         with open(os.path.join(test_data, filename), "r") as handle:
35 |             input_df = pd.read_csv((Path(test_data) / filename))
36 |             df_list.append(input_df)
37 | 
38 |     test_data = df_list[0]
39 |     testy = test_data["cost"]
40 |     testX = test_data[
41 |         [
42 |             "distance",
43 |             "dropoff_latitude",
44 |             "dropoff_longitude",
45 |             "passengers",
46 |             "pickup_latitude",
47 |             "pickup_longitude",
48 |             "store_forward",
49 |             "vendor",
50 |             "pickup_weekday",
51 |             "pickup_month",
52 |             "pickup_monthday",
53 |             "pickup_hour",
54 |             "pickup_minute",
55 |             "pickup_second",
56 |             "dropoff_weekday",
57 |             "dropoff_month",
58 |             "dropoff_monthday",
59 |             "dropoff_hour",
60 |             "dropoff_minute",
61 |             "dropoff_second",
62 |         ]
63 |     ]
64 |     logger.info(testX.shape)
65 |     logger.info(testX.columns)
66 |     return testX, testy
67 | 
68 | 
69 | def predict(testX, testy, model_input, prediction_path):
70 |     # Load the model from input port
71 |     model = pickle.load(open((Path(model_input) / "model.sav"), "rb"))
72 | 
73 |     # Make predictions on testX data and record them in a column named predicted_cost
74 |     predictions = model.predict(testX)
75 |     testX["predicted_cost"] = predictions
76 |     logger.info(testX.shape)
77 | 
78 |     # Compare predictions to actuals (testy)
79 |     output_data = pd.DataFrame(testX)
80 |     output_data["actual_cost"] = testy
81 | 
82 |     # Save the output data with feature columns, predicted cost, and actual cost in csv file
83 |     output_data = output_data.to_csv((Path(prediction_path) / "predictions.csv"))
84 | 
85 | if __name__ == "__main__":
86 |     parser = argparse.ArgumentParser("predict")
87 |     parser.add_argument("--model_input", type=str, help="Path of input model")
88 |     parser.add_argument("--test_data", type=str, help="Path to test data")
89 |     parser.add_argument("--predictions", type=str, help="Path of predictions")
90 | 
91 |     args = parser.parse_args()
92 | 
93 |     logger.info("hello scoring world...")
94 | 
95 |     model_input = args.model_input
96 |     test_data = args.test_data
97 |     prediction_path = args.predictions
98 |     main(model_input, test_data, prediction_path)
99 | 


--------------------------------------------------------------------------------
/src/nyc_src/prep/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/prep/__init__.py


--------------------------------------------------------------------------------
/src/nyc_src/prep/prep.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from pathlib import Path
  3 | from typing_extensions import Concatenate
  4 | from uuid import uuid4
  5 | from datetime import datetime
  6 | import os
  7 | import pandas as pd
  8 | from sklearn.linear_model import LinearRegression
  9 | from sklearn.model_selection import train_test_split
 10 | import pickle
 11 | from mlops.common.logger import get_logger
 12 | 
 13 | logger = get_logger("nyc_taxi_prep")
 14 | 
 15 | 
 16 | def main(raw_data, prep_data):
 17 |     logger.info("hello training world...")
 18 | 
 19 |     lines = [
 20 |         f"Raw data path: {raw_data}",
 21 |         f"Data output path: {prep_data}",
 22 |     ]
 23 | 
 24 |     for line in lines:
 25 |         logger.info(line)
 26 | 
 27 |     logger.info("mounted_path files: ")
 28 |     arr = os.listdir(raw_data)
 29 |     logger.info(arr)
 30 | 
 31 |     df_list = []
 32 |     for filename in arr:
 33 |         logger.info("reading file: %s ..." % filename)
 34 |         with open(os.path.join(raw_data, filename), "r") as handle:
 35 |             input_df = pd.read_csv((Path(raw_data) / filename))
 36 |             df_list.append(input_df)
 37 | 
 38 |     # Prep the green and yellow taxi data
 39 |     green_data = df_list[0]
 40 |     yellow_data = df_list[1]
 41 | 
 42 |     data_prep(green_data, yellow_data)
 43 | 
 44 | 
 45 | def data_prep(green_data, yellow_data):
 46 |     # Define useful columns needed for the Azure Machine Learning NYC Taxi tutorial
 47 | 
 48 |     useful_columns = str(
 49 |         [
 50 |             "cost",
 51 |             "distance",
 52 |             "dropoff_datetime",
 53 |             "dropoff_latitude",
 54 |             "dropoff_longitude",
 55 |             "passengers",
 56 |             "pickup_datetime",
 57 |             "pickup_latitude",
 58 |             "pickup_longitude",
 59 |             "store_forward",
 60 |             "vendor",
 61 |         ]
 62 |     ).replace(",", ";")
 63 |     logger.info(useful_columns)
 64 | 
 65 |     # Rename columns as per Azure Machine Learning NYC Taxi tutorial
 66 |     green_columns = str(
 67 |         {
 68 |             "vendorID": "vendor",
 69 |             "lpepPickupDatetime": "pickup_datetime",
 70 |             "lpepDropoffDatetime": "dropoff_datetime",
 71 |             "storeAndFwdFlag": "store_forward",
 72 |             "pickupLongitude": "pickup_longitude",
 73 |             "pickupLatitude": "pickup_latitude",
 74 |             "dropoffLongitude": "dropoff_longitude",
 75 |             "dropoffLatitude": "dropoff_latitude",
 76 |             "passengerCount": "passengers",
 77 |             "fareAmount": "cost",
 78 |             "tripDistance": "distance",
 79 |         }
 80 |     ).replace(",", ";")
 81 | 
 82 |     yellow_columns = str(
 83 |         {
 84 |             "vendorID": "vendor",
 85 |             "tpepPickupDateTime": "pickup_datetime",
 86 |             "tpepDropoffDateTime": "dropoff_datetime",
 87 |             "storeAndFwdFlag": "store_forward",
 88 |             "startLon": "pickup_longitude",
 89 |             "startLat": "pickup_latitude",
 90 |             "endLon": "dropoff_longitude",
 91 |             "endLat": "dropoff_latitude",
 92 |             "passengerCount": "passengers",
 93 |             "fareAmount": "cost",
 94 |             "tripDistance": "distance",
 95 |         }
 96 |     ).replace(",", ";")
 97 | 
 98 |     logger.info("green_columns: " + green_columns)
 99 |     logger.info("yellow_columns: " + yellow_columns)
100 | 
101 |     green_data_clean = cleanseData(green_data, green_columns, useful_columns)
102 |     yellow_data_clean = cleanseData(yellow_data, yellow_columns, useful_columns)
103 | 
104 |     # Append yellow data to green data
105 |     combined_df = pd.concat([green_data_clean,yellow_data_clean], ignore_index=True)
106 |     combined_df.reset_index(inplace=True, drop=True)
107 | 
108 |     output_green = green_data_clean.to_csv(
109 |         os.path.join(prep_data, "green_prep_data.csv")
110 |     )
111 |     output_yellow = yellow_data_clean.to_csv(
112 |         os.path.join(prep_data, "yellow_prep_data.csv")
113 |     )
114 |     merged_data = combined_df.to_csv(os.path.join(prep_data, "merged_data.csv"))
115 | 
116 |     logger.info("Finish")
117 | 
118 | 
119 | # These functions ensure that null data is removed from the dataset,
120 | # which will help increase machine learning model accuracy.
121 | 
122 | 
123 | def get_dict(dict_str):
124 |     pairs = dict_str.strip("{}").split(";")
125 |     new_dict = {}
126 |     for pair in pairs:
127 |         logger.info(pair)
128 |         key, value = pair.strip().split(":")
129 |         new_dict[key.strip().strip("'")] = value.strip().strip("'")
130 |     return new_dict
131 | 
132 | 
133 | def cleanseData(data, columns, useful_columns):
134 |     useful_columns = [
135 |         s.strip().strip("'") for s in useful_columns.strip("[]").split(";")
136 |     ]
137 |     new_columns = get_dict(columns)
138 | 
139 |     new_df = (data.dropna(how="all").rename(columns=new_columns))[useful_columns]
140 | 
141 |     new_df.reset_index(inplace=True, drop=True)
142 |     return new_df
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     parser = argparse.ArgumentParser()
147 |     parser.add_argument(
148 |         "--raw_data",
149 |         type=str,
150 |         default="../data/raw_data",
151 |         help="Path to raw data",
152 |     )
153 |     parser.add_argument(
154 |         "--prep_data", type=str, default="../data/prep_data", help="Path to prep data"
155 |     )
156 | 
157 |     args = parser.parse_args()
158 |     raw_data = args.raw_data
159 |     prep_data = args.prep_data
160 | 
161 |     main(raw_data, prep_data)
162 | 


--------------------------------------------------------------------------------
/src/nyc_src/register/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/register/__init__.py


--------------------------------------------------------------------------------
/src/nyc_src/register/register.py:
--------------------------------------------------------------------------------
 1 | import mlflow
 2 | import argparse
 3 | import os
 4 | import json
 5 | from pathlib import Path
 6 | 
 7 | from mlops.common.logger import get_logger
 8 | 
 9 | logger = get_logger("nyc_taxi_register")
10 | 
11 | def main(model_metadata, model_name, score_report, build_reference):
12 |     try:
13 |         run_file = open(args.model_metadata)
14 |         model_metadata = json.load(run_file)
15 |         run_uri = model_metadata["run_uri"]
16 | 
17 |         score_file = open(Path(args.score_report) / "score.txt")
18 |         score_data = json.load(score_file)
19 |         cod = score_data["cod"]
20 |         mse = score_data["mse"]
21 |         coff = score_data["coff"]
22 | 
23 |         model_version = mlflow.register_model(run_uri, model_name)
24 | 
25 |         client = mlflow.MlflowClient()
26 |         client.set_model_version_tag(
27 |             name=model_name, version=model_version.version, key="mse", value=mse
28 |         )
29 |         client.set_model_version_tag(
30 |             name=model_name, version=model_version.version, key="coff", value=coff
31 |         )
32 |         client.set_model_version_tag(
33 |             name=model_name, version=model_version.version, key="cod", value=cod
34 |         )
35 |         client.set_model_version_tag(
36 |             name=model_name,
37 |             version=model_version.version,
38 |             key="build_id",
39 |             value=build_reference,
40 |         )
41 | 
42 |         logger.info(model_version)
43 |     except Exception as ex:
44 |         logger.info(ex)
45 |         raise
46 |     finally:
47 |         run_file.close()
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     parser = argparse.ArgumentParser("register_model")
52 |     parser.add_argument(
53 |         "--model_metadata",
54 |         type=str,
55 |         help="model metadata on Machine Learning Workspace",
56 |     )
57 |     parser.add_argument("--model_name", type=str, help="model name to be registered")
58 |     parser.add_argument("--score_report", type=str, help="score report for the model")
59 |     parser.add_argument(
60 |         "--build_reference",
61 |         type=str,
62 |         help="Original AzDo build id that initiated experiment",
63 |     )
64 | 
65 |     args = parser.parse_args()
66 | 
67 |     logger.info(args.model_metadata)
68 |     logger.info(args.model_name)
69 |     logger.info(args.score_report)
70 |     logger.info(args.build_reference)
71 | 
72 |     main(args.model_metadata, args.model_name, args.score_report, args.build_reference)
73 | 


--------------------------------------------------------------------------------
/src/nyc_src/score/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/score/__init__.py


--------------------------------------------------------------------------------
/src/nyc_src/score/score.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import pandas as pd
 3 | import os
 4 | from pathlib import Path
 5 | from sklearn.linear_model import LinearRegression
 6 | import pickle
 7 | from sklearn.metrics import mean_squared_error, r2_score
 8 | import mlflow
 9 | import json
10 | 
11 | from mlops.common.logger import get_logger
12 | 
13 | logger = get_logger("nyc_taxi_score")
14 | 
15 | def main(predictions, model, score_report):
16 |     logger.info("hello scoring world...")
17 | 
18 |     lines = [
19 |         f"Model path: {model}",
20 |         f"Predictions path: {predictions}",
21 |         f"Scoring output path: {score_report}",
22 |     ]
23 | 
24 |     for line in lines:
25 |         logger.info(line)
26 | 
27 |     # Load the test data with predicted values
28 | 
29 |     logger.info("mounted_path files: ")
30 |     arr = os.listdir(predictions)
31 | 
32 |     logger.info(arr)
33 |     df_list = []
34 |     for filename in arr:
35 |         logger.info("reading file: %s ..." % filename)
36 |         with open(os.path.join(predictions, filename), "r") as handle:
37 |             input_df = pd.read_csv((Path(predictions) / filename))
38 |             df_list.append(input_df)
39 | 
40 |     test_data = df_list[0]
41 | 
42 |     # Load the model from input port
43 |     model = pickle.load(open((Path(model) / "model.sav"), "rb"))
44 |     write_results(model, predictions, test_data, score_report)
45 | 
46 | 
47 | # Print the results of scoring the predictions against actual values in the test data
48 | 
49 | 
50 | def write_results(model, predictions, test_data, score_report):
51 |     # The coefficients
52 |     logger.info("Coefficients: \n", model.coef_)
53 | 
54 |     actuals = test_data["actual_cost"]
55 |     predictions = test_data["predicted_cost"]
56 | 
57 |     mse = mean_squared_error(actuals, predictions)
58 |     r2 = r2_score(actuals, predictions)
59 | 
60 |     mlflow.log_metric("scoring_mse", mse)
61 |     mlflow.log_metric("scoring_r2", r2)
62 | 
63 |     # The mean squared error
64 |     logger.info("Mean squared error: %.2f" % mse)
65 |     # The coefficient of determination: 1 is perfect prediction
66 |     logger.info("Coefficient of determination: %.2f" % r2)
67 |     logger.info("Model: ", model)
68 | 
69 |     # Print score report to a text file
70 |     model_score = {
71 |         "mse": mean_squared_error(actuals, predictions),
72 |         "coff": str(model.coef_),
73 |         "cod": r2_score(actuals, predictions),
74 |     }
75 |     with open((Path(score_report) / "score.txt"), "w") as json_file:
76 |         json.dump(model_score, json_file, indent=4)
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     parser = argparse.ArgumentParser("score")
81 |     parser.add_argument(
82 |         "--predictions", type=str, help="Path of predictions and actual data"
83 |     )
84 |     parser.add_argument("--model", type=str, help="Path to model")
85 |     parser.add_argument("--score_report", type=str, help="Path to score report")
86 | 
87 |     args = parser.parse_args()
88 | 
89 |     predictions = args.predictions
90 |     model = args.model
91 |     score_report = args.score_report
92 | 
93 |     main(predictions, model, score_report)
94 | 


--------------------------------------------------------------------------------
/src/nyc_src/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/train/__init__.py


--------------------------------------------------------------------------------
/src/nyc_src/train/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from pathlib import Path
  3 | import os
  4 | import pandas as pd
  5 | from sklearn.linear_model import LinearRegression
  6 | from sklearn.model_selection import train_test_split
  7 | import pickle
  8 | import mlflow
  9 | import json
 10 | from mlops.common.logger import get_logger
 11 | 
 12 | logger = get_logger("nyc_taxi_train")
 13 | 
 14 | def main(training_data, test_data, model_output, model_metadata):
 15 |     logger.info("Hello training world...")
 16 | 
 17 |     lines = [
 18 |         f"Training data path: {training_data}",
 19 |         f"Test data path: {test_data}",
 20 |         f"Model output path: {model_output}",
 21 |         f"Model metadata path: {model_metadata}",
 22 |     ]
 23 | 
 24 |     for line in lines:
 25 |         logger.info(line)
 26 | 
 27 |     logger.info("mounted_path files: ")
 28 |     arr = os.listdir(training_data)
 29 |     logger.info(arr)
 30 | 
 31 |     df_list = []
 32 |     for filename in arr:
 33 |         logger.info("reading file: %s ..." % filename)
 34 |         with open(os.path.join(training_data, filename), "r") as handle:
 35 |             input_df = pd.read_csv((Path(training_data) / filename))
 36 |             df_list.append(input_df)
 37 | 
 38 |     train_data = df_list[0]
 39 |     logger.info(train_data.columns)
 40 | 
 41 |     trainX, testX, trainy, testy = split(train_data)
 42 |     write_test_data(testX, testy)
 43 |     train_model(trainX, trainy)
 44 | 
 45 | 
 46 | def split(train_data):
 47 |     # Split the data into input(X) and output(y)
 48 |     y = train_data["cost"]
 49 |     X = train_data[
 50 |         [
 51 |             "distance",
 52 |             "dropoff_latitude",
 53 |             "dropoff_longitude",
 54 |             "passengers",
 55 |             "pickup_latitude",
 56 |             "pickup_longitude",
 57 |             "store_forward",
 58 |             "vendor",
 59 |             "pickup_weekday",
 60 |             "pickup_month",
 61 |             "pickup_monthday",
 62 |             "pickup_hour",
 63 |             "pickup_minute",
 64 |             "pickup_second",
 65 |             "dropoff_weekday",
 66 |             "dropoff_month",
 67 |             "dropoff_monthday",
 68 |             "dropoff_hour",
 69 |             "dropoff_minute",
 70 |             "dropoff_second",
 71 |         ]
 72 |     ]
 73 | 
 74 |     # Split the data into train and test sets
 75 |     trainX, testX, trainy, testy = train_test_split(
 76 |         X, y, test_size=0.3, random_state=42
 77 |     )
 78 |     logger.info(trainX.shape)
 79 |     logger.info(trainX.columns)
 80 | 
 81 |     return trainX, testX, trainy, testy
 82 | 
 83 | 
 84 | def train_model(trainX, trainy):
 85 |     mlflow.autolog()
 86 |     # Train a Linear Regression Model with the train set
 87 |     with mlflow.start_run() as run:
 88 |         model = LinearRegression().fit(trainX, trainy)
 89 |         logger.info(model.score(trainX, trainy))
 90 | 
 91 |         # Output the model, metadata and test data
 92 |         run_id = mlflow.active_run().info.run_id
 93 |         model_uri = f"runs:/{run_id}/model"
 94 |         model_data = {"run_id": run.info.run_id, "run_uri": model_uri}
 95 |         with open(args.model_metadata, "w") as json_file:
 96 |             json.dump(model_data, json_file, indent=4)
 97 | 
 98 |         pickle.dump(model, open((Path(args.model_output) / "model.sav"), "wb"))
 99 | 
100 | 
101 | def write_test_data(testX, testy):
102 |     testX["cost"] = testy
103 |     logger.info(testX.shape)
104 |     testX.to_csv((Path(args.test_data) / "test_data.csv"))
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     parser = argparse.ArgumentParser("train")
109 |     parser.add_argument("--training_data", type=str, help="Path to training data")
110 |     parser.add_argument("--test_data", type=str, help="Path to test data")
111 |     parser.add_argument("--model_output", type=str, help="Path of output model")
112 |     parser.add_argument("--model_metadata", type=str, help="Path of model metadata")
113 | 
114 |     args = parser.parse_args()
115 | 
116 |     training_data = args.training_data
117 |     test_data = args.test_data
118 |     model_output = args.model_output
119 |     model_metadata = args.model_metadata
120 | 
121 |     main(training_data, test_data, model_output, model_metadata)
122 | 


--------------------------------------------------------------------------------
/src/nyc_src/transform/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/src/nyc_src/transform/__init__.py


--------------------------------------------------------------------------------
/src/nyc_src/transform/transform.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from pathlib import Path
  3 | from uuid import uuid4
  4 | from datetime import datetime
  5 | import os
  6 | import pandas as pd
  7 | import numpy as np
  8 | 
  9 | from mlops.common.logger import get_logger
 10 | 
 11 | logger = get_logger("nyc_taxi_transform")
 12 | 
 13 | def main(clean_data, transformed_data):
 14 |     lines = [
 15 |         f"Clean data path: {clean_data}",
 16 |         f"Transformed data output path: {transformed_data}",
 17 |     ]
 18 | 
 19 |     for line in lines:
 20 |         logger.info(line)
 21 | 
 22 |     logger.info("mounted_path files: ")
 23 |     arr = os.listdir(clean_data)
 24 |     logger.info(arr)
 25 | 
 26 |     df_list = []
 27 |     for filename in arr:
 28 |         logger.info("reading file: %s ..." % filename)
 29 |         with open(os.path.join(clean_data, filename), "r") as handle:
 30 |             input_df = pd.read_csv((Path(clean_data) / filename))
 31 |             df_list.append(input_df)
 32 | 
 33 |     # Transform the data
 34 |     combined_df = df_list[1]
 35 |     final_df = transform_data(combined_df)
 36 | 
 37 |     # Output data
 38 |     final_df.to_csv((Path(args.transformed_data) / "transformed_data.csv"))
 39 | 
 40 | 
 41 | # These functions filter out coordinates for locations that are outside the city border.
 42 | 
 43 | # Filter out coordinates for locations that are outside the city border.
 44 | # Chain the column filter commands within the filter() function
 45 | # and define the minimum and maximum bounds for each field
 46 | 
 47 | 
 48 | def transform_data(combined_df):
 49 |     combined_df = combined_df.astype(
 50 |         {
 51 |             "pickup_longitude": "float64",
 52 |             "pickup_latitude": "float64",
 53 |             "dropoff_longitude": "float64",
 54 |             "dropoff_latitude": "float64",
 55 |         }
 56 |     )
 57 | 
 58 |     latlong_filtered_df = combined_df[
 59 |         (combined_df.pickup_longitude <= -73.72)
 60 |         & (combined_df.pickup_longitude >= -74.09)
 61 |         & (combined_df.pickup_latitude <= 40.88)
 62 |         & (combined_df.pickup_latitude >= 40.53)
 63 |         & (combined_df.dropoff_longitude <= -73.72)
 64 |         & (combined_df.dropoff_longitude >= -74.72)
 65 |         & (combined_df.dropoff_latitude <= 40.88)
 66 |         & (combined_df.dropoff_latitude >= 40.53)
 67 |     ]
 68 | 
 69 |     latlong_filtered_df.reset_index(inplace=True, drop=True)
 70 | 
 71 |     # These functions replace undefined values and rename to use meaningful names.
 72 |     replaced_stfor_vals_df = latlong_filtered_df.replace(
 73 |         {"store_forward": "0"}, {"store_forward": "N"}
 74 |     ).fillna({"store_forward": "N"})
 75 | 
 76 |     replaced_distance_vals_df = replaced_stfor_vals_df.replace(
 77 |         {"distance": ".00"}, {"distance": 0}
 78 |     ).fillna({"distance": 0})
 79 | 
 80 |     normalized_df = replaced_distance_vals_df.astype({"distance": "float64"})
 81 | 
 82 |     # These functions transform the renamed data to be used finally for training.
 83 | 
 84 |     # Split the pickup and dropoff date further into the day of the week, day of the month, and month values.
 85 |     # To get the day of the week value, use the derive_column_by_example() function.
 86 |     # The function takes an array parameter of example objects that define the input data,
 87 |     # and the preferred output. The function automatically determines your preferred transformation.
 88 |     # For the pickup and dropoff time columns, split the time into the hour, minute, and second by using
 89 |     # the split_column_by_example() function with no example parameter. After you generate the new features,
 90 |     # use the drop_columns() function to delete the original fields as the newly generated features are preferred.
 91 |     # Rename the rest of the fields to use meaningful descriptions.
 92 | 
 93 |     temp = pd.DatetimeIndex(normalized_df["pickup_datetime"], dtype="datetime64[ns]")
 94 |     normalized_df["pickup_date"] = temp.date
 95 |     normalized_df["pickup_weekday"] = temp.dayofweek
 96 |     normalized_df["pickup_month"] = temp.month
 97 |     normalized_df["pickup_monthday"] = temp.day
 98 |     normalized_df["pickup_time"] = temp.time
 99 |     normalized_df["pickup_hour"] = temp.hour
100 |     normalized_df["pickup_minute"] = temp.minute
101 |     normalized_df["pickup_second"] = temp.second
102 | 
103 |     temp = pd.DatetimeIndex(normalized_df["dropoff_datetime"], dtype="datetime64[ns]")
104 |     normalized_df["dropoff_date"] = temp.date
105 |     normalized_df["dropoff_weekday"] = temp.dayofweek
106 |     normalized_df["dropoff_month"] = temp.month
107 |     normalized_df["dropoff_monthday"] = temp.day
108 |     normalized_df["dropoff_time"] = temp.time
109 |     normalized_df["dropoff_hour"] = temp.hour
110 |     normalized_df["dropoff_minute"] = temp.minute
111 |     normalized_df["dropoff_second"] = temp.second
112 | 
113 |     del normalized_df["pickup_datetime"]
114 |     del normalized_df["dropoff_datetime"]
115 | 
116 |     normalized_df.reset_index(inplace=True, drop=True)
117 | 
118 |     logger.info(normalized_df.head)
119 |     logger.info(normalized_df.dtypes)
120 | 
121 |     # Drop the pickup_date, dropoff_date, pickup_time, dropoff_time columns because they're
122 |     # no longer needed (granular time features like hour,
123 |     # minute and second are more useful for model training).
124 |     del normalized_df["pickup_date"]
125 |     del normalized_df["dropoff_date"]
126 |     del normalized_df["pickup_time"]
127 |     del normalized_df["dropoff_time"]
128 | 
129 |     # Change the store_forward column to binary values
130 |     normalized_df["store_forward"] = np.where(
131 |         (normalized_df.store_forward == "N"), 0, 1
132 |     )
133 | 
134 |     # Before you package the dataset, run two final filters on the dataset.
135 |     # To eliminate incorrectly captured data points,
136 |     # filter the dataset on records where both the cost and distance variable values are greater than zero.
137 |     # This step will significantly improve machine learning model accuracy,
138 |     # because data points with a zero cost or distance represent major outliers that throw off prediction accuracy.
139 | 
140 |     final_df = normalized_df[(normalized_df.distance > 0) & (normalized_df.cost > 0)]
141 |     final_df.reset_index(inplace=True, drop=True)
142 |     logger.info(final_df.head)
143 | 
144 |     return final_df
145 | 
146 | 
147 | if __name__ == "__main__":
148 |     parser = argparse.ArgumentParser("transform")
149 |     parser.add_argument("--clean_data", type=str, help="Path to prepped data")
150 |     parser.add_argument("--transformed_data", type=str, help="Path of output data")
151 | 
152 |     args = parser.parse_args()
153 | 
154 |     clean_data = args.clean_data
155 |     transformed_data = args.transformed_data
156 |     main(clean_data, transformed_data)
157 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/README.md:
--------------------------------------------------------------------------------
 1 | # Sample implementation of Model factory and Usecase builder
 2 | 
 3 | ## Context
 4 | 
 5 | This is a sample implementation of the model factory.
 6 | 
 7 | ## Design and Architecture
 8 | 
 9 | - [Design of the Model factory](docs/01-model-factory-design.md)
10 | 
11 | ## Adaptation from the accelerator and differences
12 | 
13 | ### How this sample implementation is built on top of the accelerator
14 | 
15 | - Cloned the accelerator repository into a client specific repository.
16 | - Configured the pipelines in Azure Devops, and made relevant changes to adapt to the client's ways of working.
17 | - Extended the mlops runner to support the client's use-case.
18 | - Inference code for the model was updated related to the specific model. 
19 |   
20 | ### Differences in the sample implementation from the accelerator
21 | 
22 | - There are few changes to the folder structure in the sample implementation. The accelerator will have functional blocks (ml-ops, model, src) as top level folders. Where as in the sample implementation, we have the models at the top and the functional blocks will be within the specific models.
23 | - In the accelerator, we have the devops pipeline to setup and trigger the `mlops-pipeline` in the common folder. While implementing, we had a scenario to pass in different variables for different models' ml-ops-pipeline, hence this is moved to the model specific folder.
24 | 
25 | ## How to use this sample
26 | 
27 | - [Instructions to use this sample](docs/02-instructions.md)


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/docs/01-model-factory-design.md:
--------------------------------------------------------------------------------
 1 | # Model Factory Design
 2 | 
 3 | This document provides an overview and detailed specifications for the design and implementation of a model factory. 
 4 | 
 5 | ## Introduction
 6 | 
 7 | The model factory is a system that automates the end-to-end process of developing, training, deploying, and managing machine learning models. This document outlines the architecture, components, and workflow of the model factory, along with the necessary requirements and considerations.
 8 | 
 9 | ## Features of Model Factory
10 | 
11 | - Supports generation of multiple ML Models.  
12 | - MLOps pipeline for Data preparation, transformation, Model Training, evaluation, scoring and registration.  
13 | - Each ML Model is packaged in an independent Docker Image.  
14 | - Model verification before storing the Docker image.  
15 | - All Docker images are stored in Azure Container Registry.  
16 | - Builds and deploys Smoke Test module on Edge device.  
17 | - Based on Azure ML SDK v2 1.4
18 | 
19 | ## Architecture
20 | 
21 | It consists of following components:
22 | 
23 | 1. **Model Source**
24 |    Model source contains the code written by data science team to execute the different stages of model development lifecycle. It is part of `src` folder in the model directory.
25 | 1. **Model Packaging**
26 |    This component is used to package ML model into docker container images. It is part of `model` folder in model directory.
27 | 1. **MLOps Pipelines**
28 |    It is a placeholder for different ml components used to create MLOps pipelines. It also contains ml ops pipelines code which can be triggered from DevOps pipelines. It is part of `mlops` folder in model directory.
29 | 1. **DevOps Pipelines**
30 |    It contains Azure DevOps related pipelines which help in converting the ml code into model container images and push them to the container registry. It is part of `devops` folder in model directory. These pipelines perform various tasks like:  
31 |    a. Validation of python code(linting, and unit testing)  
32 |    b. Execution of MLOps pipelines and registering model on AML workspace.  
33 |    c. Creation of model docker container images.  
34 |    d. Pushing those docker container images to ACR.  
35 |    e. Performing Smoke tests on those docker   container images.  
36 | 1. **Model Repository**
37 |    Model Repository is part of AML workspace which store all the models generated by MLOps pipelines.
38 | 1. **Docker Container Repository**
39 |    It stores all the model docker container images.
40 | 1. **Notebooks**
41 |    These are Jupyter notebooks used by data science team to work and test logic of model generation. These are contained in `notebooks` folder in model_factory folder.
42 | 1. **Common Code**
43 |    These are common code which consists of common DevOps template pipelines and also common code for MLOps. It is contained in `common` folder in model_factory directory.
44 | 1. **Unit Tests**
45 |    These are specific to each model and are contained in `tests` folder in model directory.
46 | 
47 | ## Development workflow of Model Factory  
48 | 
49 | ![development flow](/docs/assets/images/model_factory_design.jpg)
50 | 
51 | The above diagram depicts the development workflow of model factory. At a high level following steps are followed in the workflow:
52 | 
53 | 1. Data Science team works on development of model on local machine using VSCode remote extension or on AML workspace. This development is generally done in Jupyter notebook.
54 | 1. Once the model is ready and is tested, code from notebooks is brought into `src` folder for that model.
55 | 1. Code is then pushed to dev environment, where MLOps and DevOps pipelines are built and triggered which helps in automating the generation of ML models and pushing ML model docker containers images the dev ACR.
56 | 1. Data Science team validates the model metrics after which the code is push to prod env where pipelines execute basic tests.
57 | 1. Once all steps are successful there is final Gated approval check which allows pushing of these images into prod ACR.
58 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/docs/02-instructions.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | This document contains the instructions to use this sample.
 4 | 
 5 | ## Prerequisite
 6 | 
 7 | Following azure resources are required to run this sample:
 8 | 
 9 | 1. Azure AML Workspace:
10 |     AML workspace also create following resources:
11 |     1. Application Insights
12 |     1. Azure Keyvault
13 |     1. Azure Blob Storage
14 | 1. Azure Container Registry
15 | 1. Azure VM: It will be used for smoke test.
16 | 1. Azure VM: It will be used for e2e test.
17 | 1. Azure Devops
18 | 1. Service Principal
19 | 
20 | ## Steps
21 | 
22 | 1. Provision Azure AML workspace.
23 | 1. Provision Azure Container registry.
24 | 1. Create Service Connection in Azure Devops providing access to resource group.
25 | 1. Create two variable groups in Azure Devops
26 |     1. mlops_platform_dev_vg
27 |     1. mlops_platform_prod_vg  
28 |     Add following variables to the both these variable group. Here different ACR's can be used for prod and dev env.
29 |     1. ACR_URL : Azure container registry url
30 |     1. ACR_USERNAME : Azure container registry username
31 |     1. ACR_PASSWORD: Azure container registry password
32 |     1. AZURE_RM_SVC_CONNECTION: Service Connection name
33 |     1. KEYVAULT_NAME: Keyvault name
34 | 1. Create Service Principal  
35 | `az ad sp create-for-rbac --name <name> --role owner --scopes /subscriptions/<subid>/resourceGroups/<resorucegroup>`
36 | 1. Give Service Principal access to AML workspace and Keyvault
37 | 1. Give Service Connection access to AML worksapce and keyvault.
38 | 1. Add following secrets to Azure Keyvault:
39 |     1. aml-service-principal-id: Service Principal created in step 7
40 |     1. aml-service-principal-secret: Service Principal secret
41 |     1. tenant-id
42 |     1. applicationinsights-connection-string: Application insights connection string
43 | 
44 |     Following variables are related to ACR.
45 |     1. registry-uri: Dev ACR url
46 |     1. registry-password : Dev ACR password
47 |     1. registry-username: Dev ACR username  
48 |     Dev ACR
49 |     1. registry-uri-dev: Dev ACR url
50 |     1. registry-username-dev: Dev ACR username
51 |     1. registry-password-dev: Dev ACR password  
52 |     Prod ACR
53 |     1. registry-uri-prod: Prod ACR url
54 |     1. registry-username-prod: Prod ACR username
55 |     1. registry-password-prod: Prod ACR password
56 | 1. Update model_config(`model_factory\fridge_obj_det\config\model_config.json`) with required values.
57 | 1. Create Azure Pipelines using following yaml files.
58 |     1. Model factory Pipelines:
59 |         1. fridge_obj_det_dev_pipeline: `model_factory\fridge_obj_det\devops\pipelines\fridge_obj_det_dev_pipeline.yml`
60 |         1. fridge_obj_det_main_pipeline:
61 |     `model_factory\fridge_obj_det\devops\pipelines\fridge_obj_det_main_pipeline.yml`
62 | 
63 | 1. Execution of pipelines
64 |     1. Model Factory Pipelines:  
65 |        - Model factory dev pipeline executes AML pipelines and creates model container docker image and pushes it to ACR.  
66 |        - Model factory main pipeline executes AML pipeline, creates docker image, and pushes image to ACR.
67 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/docs/assets/images/model_factory_design.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/telco_case_study_implementation/fridge_object_detection/docs/assets/images/model_factory_design.jpg


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/__init__.py:
--------------------------------------------------------------------------------
1 | """TODO Doc string."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/__init__.py:
--------------------------------------------------------------------------------
1 | """Common module for model factory."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/configure_azureml_agent.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 |   - name: model_type
 3 |     displayName: "type of model to execute"
 4 | 
 5 | steps:
 6 |   - task: UsePythonVersion@0
 7 |     displayName: "Use Python 3.8"
 8 |     inputs:
 9 |       versionSpec: "3.8"
10 | 
11 |   - script: |
12 |       python -m venv env
13 |       source env/bin/activate
14 |     displayName: "Create Virtual env"
15 | 
16 |   - task: AzureCLI@2
17 |     displayName: Install Job Requirements
18 |     inputs:
19 |       azureSubscription: $(AZURE_RM_SVC_CONNECTION)
20 |       scriptType: bash
21 |       scriptLocation: inlineScript
22 |       inlineScript: |
23 |         set -e # fail on error
24 |         source env/bin/activate
25 |         python -m pip install --upgrade pip
26 |         pip install -r $(System.DefaultWorkingDirectory)/model_factory/${{parameters.model_type}}/devops/pipelines/requirements/execute_job_requirements.txt
27 |         az version
28 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/execute_mlops_pipeline.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 |   - name: script_parameter
 3 |     type: string
 4 | 
 5 | steps:
 6 |   - task: AzureKeyVault@2
 7 |     continueOnError: false
 8 |     inputs:
 9 |       azureSubscription: $(AZURE_RM_SVC_CONNECTION)
10 |       KeyVaultName: $(KEYVAULT_NAME)
11 |       SecretsFilter: "*"
12 |       RunAsPreJob: false
13 |   - task: AzureCLI@2
14 |     name: Execute_ml_Job_Pipeline
15 |     displayName: Execute Azure ML pipeline job
16 |     continueOnError: false
17 |     inputs:
18 |       azureSubscription: $(AZURE_RM_SVC_CONNECTION)
19 |       scriptType: bash
20 |       workingDirectory: $(System.DefaultWorkingDirectory)/model_factory
21 |       scriptLocation: inlineScript
22 |       inlineScript: |
23 |         source ../env/bin/activate
24 |         export AZURE_CLIENT_ID=$(aml-service-principal-id)
25 |         export AZURE_TENANT_ID=$(tenant-id)
26 |         export AZURE_CLIENT_SECRET=$(aml-service-principal-secret)
27 |         which python
28 |         ${{parameters.script_parameter}}
29 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/experiment_variables.yml:
--------------------------------------------------------------------------------
 1 | variables:
 2 |   - name: ML_MODEL_CONFIG_NAME
 3 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ML_MODEL_CONFIG_NAME'] ]
 4 |   - name: KEYVAULT_NAME
 5 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.KEYVAULT_NAME'] ]
 6 |   - name: ML_SCOPE_SVC_CONNECTION
 7 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ML_SCOPE_SVC_CONNECTION'] ]
 8 |   - name: HOST_PORT_NUMBER
 9 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.HOST_PORT_NUMBER'] ]
10 |   - name: EXPERIMENT_BASE_NAME
11 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.EXPERIMENT_BASE_NAME'] ]
12 |   - name: ENVIRONMENT_NAME
13 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ENVIRONMENT_NAME'] ]
14 |   - name: ENV_BASE_IMAGE_NAME
15 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.ENV_BASE_IMAGE_NAME'] ]
16 |   - name: DISPLAY_BASE_NAME
17 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.DISPLAY_BASE_NAME'] ]
18 |   - name: CONDA_PATH
19 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CONDA_PATH'] ]
20 |   - name: CLUSTER_SIZE
21 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_SIZE'] ]
22 |   - name: CLUSTER_REGION
23 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_REGION'] ]
24 |   - name: CLUSTER_NAME
25 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.CLUSTER_NAME'] ]
26 |   - name: AZURE_RM_SVC_CONNECTION
27 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.AZURE_RM_SVC_CONNECTION'] ]
28 |   - name: MODEL_BASE_NAME
29 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.MODEL_BASE_NAME'] ]
30 |   - name: REGISTRY_NAME
31 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.REGISTRY_NAME'] ]
32 |   - name: RESOURCE_GROUP_NAME
33 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.RESOURCE_GROUP_NAME'] ]
34 |   - name: SCORE_FILE_NAME
35 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.SCORE_FILE_NAME'] ]
36 |   - name: WORKSPACE_NAME
37 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.WORKSPACE_NAME'] ]
38 |   - name: EXPERIMENT_NAME
39 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.EXPERIMENT_NAME'] ]
40 |   - name: DISPLAY_NAME
41 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.DISPLAY_NAME'] ]
42 |   - name: MODEL_NAME
43 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.MODEL_NAME'] ]
44 |   - name: HEALTHCHECK_TIMEOUT
45 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.HEALTHCHECK_TIMEOUT'] ]
46 |   - name: AML_ARTIFACT_PATH
47 |     value: $[ stageDependencies.variable_generation.load_config_variables.outputs['loading_model_config.AML_ARTIFACT_PATH'] ]


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/get_connection_details.yml:
--------------------------------------------------------------------------------
 1 | steps:
 2 | - task: AzureCLI@2
 3 |   name: retrieveAzureServiceConnection
 4 |   displayName: Retrieve Azure Service Connection
 5 |   inputs:
 6 |     azureSubscription: $(AZURE_RM_SVC_CONNECTION)
 7 |     scriptLocation: inlineScript
 8 |     scriptType: bash
 9 |     inlineScript: |
10 |       export subscriptionId=$(az account show --query id -o tsv)
11 |       echo "##vso[task.setvariable variable=SUBSCRIPTION_ID]$subscriptionId"
12 |       echo "##vso[task.setvariable variable=TENANT_ID]$tenantId"
13 |     addSpnToEnvironment: true


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/image_generation_template.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 |   - name: exec_environment
 3 |     displayName: "Execution Environment"
 4 |     default: "dev"
 5 |   - name: model_type
 6 |     displayName: "type of model to execute"
 7 |   - name: model_name
 8 |     displayName: "model name"
 9 |     default: $(MODEL_NAME)
10 |   - name: dependencies
11 |     default: ""
12 | 
13 | jobs:
14 |   - job: build_docker_image
15 |     timeoutInMinutes: 0
16 |     dependsOn: ${{ parameters.dependencies }}
17 |     steps:
18 |       - task: UsePythonVersion@0
19 |         displayName: "install python 3.8"
20 |         continueOnError: false
21 |         inputs:
22 |           versionSpec: "3.8"
23 | 
24 |       - task: AzureKeyVault@2
25 |         continueOnError: false
26 |         inputs:
27 |           azureSubscription: $(AZURE_RM_SVC_CONNECTION)
28 |           KeyVaultName: $(KEYVAULT_NAME)
29 |           SecretsFilter: "*"
30 |           RunAsPreJob: false
31 | 
32 |       - script: |
33 |           python -m venv env
34 |         displayName: "Create Virtual env"
35 | 
36 |       - task: AzureCLI@2
37 |         displayName: "setup the build server"
38 |         continueOnError: false
39 |         inputs:
40 |           azureSubscription: $(AZURE_RM_SVC_CONNECTION)
41 |           scriptType: bash
42 |           workingDirectory: $(system.DefaultWorkingDirectory)
43 |           scriptLocation: inlineScript
44 |           inlineScript: |
45 |             source env/bin/activate
46 |             python -m pip install --upgrade pip
47 |             pip install -r model_factory/${{parameters.model_type}}/devops/pipelines/requirements/execute_job_requirements.txt
48 |             az extension add -n ml -y
49 |             az upgrade --yes
50 |             az config set extension.use_dynamic_install=yes_without_prompt
51 | 
52 |       - task: AzureCLI@2
53 |         displayName: Download Model artifacts from AzureML Model Registry
54 |         continueOnError: false
55 |         inputs:
56 |           azureSubscription: $(AZURE_RM_SVC_CONNECTION)
57 |           scriptType: bash
58 |           scriptLocation: inlineScript
59 |           workingDirectory: $(system.DefaultWorkingDirectory)
60 |           inlineScript: |
61 |             set -e # fail on error
62 |             source env/bin/activate
63 |             az ml model download --name ${{parameters.model_name}} --version $(az ml model list --name ${{parameters.model_name}}  --resource-group $(RESOURCE_GROUP_NAME)  --workspace-name $(WORKSPACE_NAME) --query "[0].version" --output tsv) --download-path model_factory/${{parameters.model_type}}/model/model_download  -g $(RESOURCE_GROUP_NAME) -w $(WORKSPACE_NAME)
64 |             tar -xvzf model_factory/${{parameters.model_type}}/model/model_download/${{parameters.model_name}}/model_artifacts.tar.gz -C model_factory/${{parameters.model_type}}/model/model_artifacts
65 | 
66 |       - task: AzureCLI@2
67 |         displayName: Build Docker Image with model artifacts
68 |         continueOnError: false
69 |         inputs:
70 |           azureSubscription: $(AZURE_RM_SVC_CONNECTION)
71 |           scriptType: bash
72 |           workingDirectory: $(system.DefaultWorkingDirectory)/model_factory/${{parameters.model_type}}/model
73 |           scriptLocation: inlineScript
74 |           inlineScript: |
75 |             set -e # fail on error
76 |             # docker buildx use mybuilder
77 |             # docker buildx inspect --bootstrap
78 |             docker login "$(ACR_URL)" -u "$(ACR_USERNAME)" -p "$(ACR_PASSWORD)" 
79 |             docker buildx build --platform linux/amd64 -t $(ACR_URL)/$(MODEL_NAME):$(Build.BuildNumber) . --push
80 |             # docker buildx build --platform linux/amd64 -t $(ACR_URL)/$(MODEL_NAME):$(Build.BuildNumber) . --push
81 |             docker images
82 |             echo "##vso[task.setvariable variable=ML_MODEL_DOCKER_IMAGE_NAME;isOutput=true;]$(MODEL_NAME)"
83 |             echo "##vso[task.setvariable variable=ML_MODEL_DOCKER_IMAGE_VERSION;isOutput=true;]$(Build.BuildNumber)"
84 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/platform_dev_pipeline.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 |   - name: exec_environment
 3 |     displayName: "Execution Environment"
 4 |     default: "dev"
 5 |   - name: model_type
 6 |     displayName: "type of model to execute"
 7 |   - name: runMlops
 8 |     displayName: "Run MLOps"
 9 |     default: true
10 | 
11 | stages:
12 |   - stage: execute_training_job
13 |     displayName: execute_training_job
14 |     dependsOn:
15 |       - variable_generation
16 |       - build_validation
17 |     variables:
18 |       - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/experiment_variables.yml
19 |     jobs:
20 |       - job: Execute_ml_Job_Pipeline
21 |         condition: eq('${{ parameters.runMlops }}', true)
22 |         timeoutInMinutes: 360
23 |         steps:
24 |           - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/get_connection_details.yml
25 |           - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/configure_azureml_agent.yml
26 |             parameters:
27 |               model_type: ${{parameters.model_type}}
28 |           - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/${{parameters.model_type}}/devops/pipelines/${{parameters.model_type}}_mlops_pipeline.yml
29 |             parameters:
30 |               model_type: ${{parameters.model_type}}
31 |               model_name: $(MODEL_NAME)
32 | 
33 |       - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/image_generation_template.yml
34 |         parameters:
35 |           exec_environment: ${{parameters.exec_environment}}
36 |           model_type: ${{parameters.model_type}}
37 |           dependencies: "Execute_ml_Job_Pipeline"
38 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/platform_main_pipeline.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 |   - name: exec_environment
 3 |     displayName: "Execution Environment"
 4 |     default: "dev"
 5 |   - name: model_type
 6 |     displayName: "type of model to execute"
 7 |   - name: TARGET_CONDITION
 8 |     displayName: "Target Condition for the deployment"
 9 |   - name: runMlops
10 |     displayName: "Run MLOps"
11 |     default: true
12 | 
13 | stages:
14 |   - stage: execute_training_job
15 |     displayName: execute_training_job
16 |     dependsOn:
17 |       - variable_generation
18 |       - build_validation
19 |     variables:
20 |       - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/experiment_variables.yml
21 |       - group: mlops_platform_dev_vg
22 |     jobs:
23 |       - job: Execute_ml_Job_Pipeline
24 |         condition: eq('${{ parameters.runMlops }}', true)
25 |         timeoutInMinutes: 360
26 |         steps:
27 |           - task: UsePythonVersion@0
28 |             displayName: "Use Python 3.8"
29 |             inputs:
30 |               versionSpec: "3.8"
31 |               addToPath: true
32 |           - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/get_connection_details.yml
33 |           - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/configure_azureml_agent.yml
34 |             parameters:
35 |               model_type: ${{parameters.model_type}}
36 |           - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/${{parameters.model_type}}/devops/pipelines/${{parameters.model_type}}_mlops_pipeline.yml
37 |             parameters:
38 |               model_type: ${{parameters.model_type}}
39 |               model_name: "${{parameters.model_type}}_prod_master"
40 | 
41 |       - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/image_generation_template.yml
42 |         parameters:
43 |           exec_environment: ${{ parameters.exec_environment }}
44 |           model_type: ${{ parameters.model_type }}
45 |           model_name: "${{parameters.model_type}}_prod_master"
46 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/devops/templates/variables_template.yml:
--------------------------------------------------------------------------------
  1 | parameters:
  2 |   - name: exec_environment
  3 |     displayName: "Execution Environment"
  4 |   - name: model_type
  5 |     displayName: "type of model to execute"
  6 | 
  7 | stages:
  8 |   - stage: variable_generation
  9 |     jobs:
 10 |       - job: load_config_variables
 11 |         steps:
 12 |           - powershell: |
 13 |               $json = Get-Content -Raw -Path '$(System.DefaultWorkingDirectory)/model_factory/${{parameters.model_type}}/config/model_config.json' | ConvertFrom-Json
 14 |               $firstElement = $json.models | Where-Object {($_.ML_MODEL_CONFIG_NAME -eq "${{ parameters.model_type }}") -and ($_.ENV_NAME -eq "${{ parameters.exec_environment }}")} | Select-Object -First 1
 15 | 
 16 |               Write-Output $firstElement.KEYVAULT_NAME
 17 | 
 18 |               foreach ($property in $firstElement.PSObject.Properties) {
 19 |                 $pname = $property.Name
 20 |                 $pvalue = $property.Value
 21 |                 Write-Output "##vso[task.setvariable variable=$pname;isoutput=true]$pvalue"
 22 |                 }
 23 | 
 24 |               $EXPERIMENT_NAME =  "${{ parameters.model_type }}" + "_" + "$($firstElement.EXPERIMENT_BASE_NAME)" + "_" + "${{parameters.exec_environment}}" + "_" + "$(Build.SourceBranchName)"
 25 |               Write-Output "##vso[task.setvariable variable=EXPERIMENT_NAME;isoutput=true]$EXPERIMENT_NAME"
 26 | 
 27 |               $DISPLAY_NAME =  "${{ parameters.model_type }}" + "_" + "$($firstElement.DISPLAY_BASE_NAME)" + "_" + "${{parameters.exec_environment}}" + "_" + "$(Build.BuildID)"
 28 |               Write-Output "##vso[task.setvariable variable=DISPLAY_NAME;isoutput=true]$DISPLAY_NAME"
 29 |               Write-Output $DISPLAY_NAME
 30 |               $MODEL_NAME =  "${{ parameters.model_type }}" + "_" + "$($firstElement.MODEL_BASE_NAME)" + "_" + "${{parameters.exec_environment}}" + "_" + "$(Build.SourceBranchName)"
 31 |               Write-Output "##vso[task.setvariable variable=MODEL_NAME;isoutput=true]$MODEL_NAME"
 32 |             name: loading_model_config
 33 | 
 34 |       - job: validate_assign_variables
 35 |         dependsOn: load_config_variables
 36 |         variables:
 37 |           - name: ML_MODEL_CONFIG_NAME
 38 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.ML_MODEL_CONFIG_NAME'] ]
 39 |           - name: KEYVAULT_NAME
 40 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.KEYVAULT_NAME'] ]
 41 |           - name: ML_SCOPE_SVC_CONNECTION
 42 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.ML_SCOPE_SVC_CONNECTION'] ]
 43 |           - name: HOST_PORT_NUMBER
 44 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.HOST_PORT_NUMBER'] ]
 45 |           - name: EXPERIMENT_BASE_NAME
 46 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.EXPERIMENT_BASE_NAME'] ]
 47 |           - name: ENVIRONMENT_NAME
 48 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.ENVIRONMENT_NAME'] ]
 49 |           - name: ENV_BASE_IMAGE_NAME
 50 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.ENV_BASE_IMAGE_NAME'] ]
 51 |           - name: DISPLAY_BASE_NAME
 52 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.DISPLAY_BASE_NAME'] ]
 53 |           - name: CONDA_PATH
 54 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.CONDA_PATH'] ]
 55 |           - name: CLUSTER_SIZE
 56 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_SIZE'] ]
 57 |           - name: CLUSTER_REGION
 58 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_REGION'] ]
 59 |           - name: CLUSTER_NAME
 60 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.CLUSTER_NAME'] ]
 61 |           - name: AZURE_RM_SVC_CONNECTION
 62 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.AZURE_RM_SVC_CONNECTION'] ]
 63 |           - name: MODEL_BASE_NAME
 64 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.MODEL_BASE_NAME'] ]
 65 |           - name: REGISTRY_NAME
 66 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.REGISTRY_NAME'] ]
 67 |           - name: RESOURCE_GROUP_NAME
 68 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.RESOURCE_GROUP_NAME'] ]
 69 |           - name: SCORE_FILE_NAME
 70 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.SCORE_FILE_NAME'] ]
 71 |           - name: WORKSPACE_NAME
 72 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.WORKSPACE_NAME'] ]
 73 |           - name: EXPERIMENT_NAME
 74 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.EXPERIMENT_NAME'] ]
 75 |           - name: DISPLAY_NAME
 76 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.DISPLAY_NAME'] ]
 77 |           - name: MODEL_NAME
 78 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.MODEL_NAME'] ]
 79 |           - name: HEALTHCHECK_TIMEOUT
 80 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.HEALTHCHECK_TIMEOUT'] ]
 81 |           - name: AML_ARTIFACT_PATH
 82 |             value: $[ dependencies.load_config_variables.outputs['loading_model_config.AML_ARTIFACT_PATH'] ]
 83 |         steps:
 84 |           - script: |
 85 |               if [ -z "$(ML_MODEL_CONFIG_NAME)" ]
 86 |               then
 87 |                 echo "variables are not available. Check parameter values or config json file for valid values.."
 88 |                 exit 1
 89 |               else
 90 |                 echo "variables were loaded from config file.."
 91 |                 printenv
 92 |               fi
 93 |             name: validate_variable_load
 94 |           - task: AzureKeyVault@2
 95 |             continueOnError: false
 96 |             inputs:
 97 |               azureSubscription: $(AZURE_RM_SVC_CONNECTION)
 98 |               KeyVaultName: $(KEYVAULT_NAME)
 99 |               SecretsFilter: "*"
100 |               RunAsPreJob: false
101 |             name: load_keyvault_secrets
102 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/logging/__init__.py:
--------------------------------------------------------------------------------
1 | """Logging common module."""


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/logging/logger.py:
--------------------------------------------------------------------------------
 1 | """Reusable logger for model_factory."""
 2 | import logging
 3 | import sys
 4 | 
 5 | 
 6 | def get_logger(name: str = "dataops", level: int = logging.DEBUG) -> logging.Logger:
 7 |     """Get logger for dataops module.
 8 | 
 9 |     Args:
10 |         name (str, optional): Logger name. Defaults to "dataops".
11 |         level (int, optional): Log level. Defaults to logging.DEBUG.
12 | 
13 |     Returns:
14 |         logging.Logger: named logger.
15 |     """
16 |     logger = logging.getLogger(name)
17 |     if logger.hasHandlers():
18 |         return logger
19 | 
20 |     handler = logging.StreamHandler(sys.stdout)
21 |     formatter = logging.Formatter(
22 |         "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
23 |     )
24 |     handler.setFormatter(formatter)
25 | 
26 |     logger.setLevel(level)
27 |     logger.addHandler(handler)
28 | 
29 |     return logger
30 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/mlops/__init__.py:
--------------------------------------------------------------------------------
1 | """MLOps Common source."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/mlops/get_aml_client.py:
--------------------------------------------------------------------------------
 1 | """Returns an MLClient object using the provided credentials."""
 2 | 
 3 | import os
 4 | from azure.identity import EnvironmentCredential
 5 | from azure.ai.ml import MLClient
 6 | 
 7 | 
 8 | def get_aml_client(
 9 |     client_id: str,
10 |     client_secret: str,
11 |     tenant_id: str,
12 |     subscription_id: str,
13 |     resource_group_name: str,
14 |     workspace_name: str,
15 | ):
16 |     """Create MLClient object using the provided credentials."""
17 |     client = None
18 |     try:
19 |         os.environ["AZURE_CLIENT_ID"] = client_id
20 |         os.environ["AZURE_CLIENT_SECRET"] = client_secret
21 |         os.environ["AZURE_TENANT_ID"] = tenant_id
22 | 
23 |         credential = EnvironmentCredential()
24 |         client = MLClient(
25 |             credential,
26 |             subscription_id=subscription_id,
27 |             resource_group_name=resource_group_name,
28 |             workspace_name=workspace_name,
29 |         )
30 | 
31 |         if client is None:
32 |             raise Exception("Unable to create MLClient object.")
33 |         return client
34 |     except Exception as ex:
35 |         print(f"Exception while creating MLClient: {ex}")
36 |         raise
37 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/mlops/get_compute.py:
--------------------------------------------------------------------------------
  1 | """Return AML Compute instance."""
  2 | import argparse
  3 | from azure.ai.ml import MLClient
  4 | from azure.identity import DefaultAzureCredential
  5 | from azure.ai.ml.entities import AmlCompute
  6 | from azure.core.exceptions import ResourceNotFoundError
  7 | from common.logging.logger import get_logger
  8 | 
  9 | logger = get_logger("common_mlops")
 10 | 
 11 | 
 12 | def get_compute(
 13 |     subscription_id: str,
 14 |     resource_group_name: str,
 15 |     workspace_name: str,
 16 |     cluster_name: str,
 17 |     cluster_size: str,
 18 |     cluster_region: str,
 19 |     min_instances: int,
 20 |     max_instances: int,
 21 |     idle_time_before_scale_down: int,
 22 | ):
 23 |     """Return AML Compute instance.
 24 | 
 25 |     Args:
 26 |         subscription_id (str): subscription id
 27 |         resource_group_name (str): resource group name
 28 |         workspace_name (str): workspace name
 29 |         cluster_name (str): cluster name
 30 |         cluster_size (str): cluster size
 31 |         cluster_region (str): cluster region
 32 |         min_instances (int): min instances
 33 |         max_instances (int): max instances
 34 |         idle_time_before_scale_down (int): idle time
 35 | 
 36 |     Returns:
 37 |         _type_: AML Compute
 38 |     """
 39 |     compute_object = None
 40 |     try:
 41 |         client = MLClient(
 42 |             DefaultAzureCredential(),
 43 |             subscription_id=subscription_id,
 44 |             resource_group_name=resource_group_name,
 45 |             workspace_name=workspace_name,
 46 |         )
 47 |         compute_object = client.compute.get(cluster_name)
 48 |         logger.info(f"Found existing compute target {cluster_name}, so using it.")
 49 |     except ResourceNotFoundError:
 50 |         logger.info(f"{cluster_name} is not found! Trying to create a new one.")
 51 |         compute_object = AmlCompute(
 52 |             name=cluster_name,
 53 |             type="amlcompute",
 54 |             size=cluster_size,
 55 |             location=cluster_region,
 56 |             min_instances=min_instances,
 57 |             max_instances=max_instances,
 58 |             idle_time_before_scale_down=idle_time_before_scale_down,
 59 |         )
 60 |         logger.info("Able to create AML Compute object")
 61 |         compute_object = client.compute.begin_create_or_update(
 62 |             compute_object
 63 |         ).result()
 64 |         logger.info(f"A new cluster {cluster_name} has been created.")
 65 |     except Exception:
 66 |         logger.exception(f"Not able to access compute, exception")
 67 |         raise
 68 |     return compute_object
 69 | 
 70 | 
 71 | def main():
 72 |     """Return AML Compute instance."""
 73 |     parser = argparse.ArgumentParser("get_compute")
 74 |     parser.add_argument("--subscription_id", type=str, help="Azure subscription id")
 75 |     parser.add_argument(
 76 |         "--resource_group_name", type=str, help="Azure Machine learning resource group"
 77 |     )
 78 |     parser.add_argument(
 79 |         "--workspace_name", type=str, help="Azure Machine learning Workspace name"
 80 |     )
 81 |     parser.add_argument(
 82 |         "--cluster_name", type=str, help="Azure Machine learning cluster name"
 83 |     )
 84 |     parser.add_argument(
 85 |         "--cluster_size", type=str, help="Azure Machine learning cluster size"
 86 |     )
 87 |     parser.add_argument(
 88 |         "--cluster_region", type=str, help="Azure Machine learning cluster region"
 89 |     )
 90 |     parser.add_argument("--min_instances", type=int, default=0)
 91 |     parser.add_argument("--max_instances", type=int, default=4)
 92 |     parser.add_argument("--idle_time_before_scale_down", type=int, default=120)
 93 | 
 94 |     args = parser.parse_args()
 95 |     get_compute(
 96 |         args.subscription_id,
 97 |         args.resource_group_name,
 98 |         args.workspace_name,
 99 |         args.cluster_name,
100 |         args.cluster_size,
101 |         args.cluster_region,
102 |         args.min_instances,
103 |         args.max_instances,
104 |         args.idle_time_before_scale_down,
105 |     )
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     main()
110 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/mlops/get_environment.py:
--------------------------------------------------------------------------------
 1 | """Return AML workspace environment."""
 2 | import argparse
 3 | from azure.ai.ml import MLClient
 4 | from azure.identity import DefaultAzureCredential
 5 | from azure.ai.ml.entities import Environment
 6 | from common.logging.logger import get_logger
 7 | 
 8 | logger = get_logger("common_mlops")
 9 | 
10 | 
11 | def get_environment(
12 |     subscription_id: str,
13 |     resource_group_name: str,
14 |     workspace_name: str,
15 |     env_base_image_name: str,
16 |     conda_path: str,
17 |     environment_name: str,
18 |     description: str,
19 | ):
20 |     """Get AML workspace environment.
21 | 
22 |     Args:
23 |         subscription_id (str): subscription id
24 |         resource_group_name (str): resource group name
25 |         workspace_name (str): worksapce name
26 |         env_base_image_name (str): env base image name
27 |         conda_path (str): conda path
28 |         environment_name (str): env name
29 |         description (str): description
30 | 
31 |     Returns:
32 |         _type_: workspace env
33 |     """
34 |     try:
35 |         logger.info(f"Checking {environment_name} environment.")
36 |         client = MLClient(
37 |             DefaultAzureCredential(),
38 |             subscription_id=subscription_id,
39 |             resource_group_name=resource_group_name,
40 |             workspace_name=workspace_name,
41 |         )
42 |         env_docker_conda = Environment(
43 |             image=env_base_image_name,
44 |             conda_file=conda_path,
45 |             name=environment_name,
46 |             description=description,
47 |         )
48 |         environment = client.environments.create_or_update(env_docker_conda)
49 |         logger.info(f"Environment {environment_name} has been created or updated.")
50 |         return environment
51 | 
52 |     except Exception:
53 |         logger.exception(f"Not able to get environment")
54 |         raise
55 | 
56 | 
57 | def main():
58 |     """Return AML workspace environment."""
59 |     parser = argparse.ArgumentParser("prepare_environment")
60 |     parser.add_argument("--subscription_id", type=str, help="Azure subscription id")
61 |     parser.add_argument(
62 |         "--resource_group_name", type=str, help="Azure Machine learning resource group"
63 |     )
64 |     parser.add_argument(
65 |         "--workspace_name", type=str, help="Azure Machine learning Workspace name"
66 |     )
67 |     parser.add_argument(
68 |         "--env_base_image_name", type=str, help="Environment custom base image name"
69 |     )
70 |     parser.add_argument(
71 |         "--conda_path", type=str, help="path to conda requirements file"
72 |     )
73 |     parser.add_argument(
74 |         "--environment_name", type=str, help="Azure Machine learning environment name"
75 |     )
76 |     parser.add_argument(
77 |         "--description", type=str, default="Environment created using Conda."
78 |     )
79 |     args = parser.parse_args()
80 | 
81 |     get_environment(
82 |         args.subscription_id,
83 |         args.resource_group_name,
84 |         args.workspace_name,
85 |         args.env_base_image_name,
86 |         args.conda_path,
87 |         args.environment_name,
88 |         args.description,
89 |     )
90 | 
91 | 
92 | if __name__ == "__main__":
93 |     main()
94 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/common/mlops/get_workspace.py:
--------------------------------------------------------------------------------
 1 | """Retrieve Azure Machine Learning workspace."""
 2 | import argparse
 3 | from azure.ai.ml import MLClient
 4 | from azure.identity import DefaultAzureCredential
 5 | from common.logging.logger import get_logger
 6 | 
 7 | logger = get_logger("common_mlops")
 8 | 
 9 | 
10 | def get_workspace(subscription_id: str, resource_group_name: str, workspace_name: str):
11 |     """Get AML workspace.
12 | 
13 |     Args:
14 |         subscription_id (str): subscription id
15 |         resource_group_name (str): resource group name
16 |         workspace_name (str): workspace name
17 | 
18 |     Returns:
19 |         _type_: Workspace
20 |     """
21 |     try:
22 |         logger.info(f"Getting access to {workspace_name} workspace.")
23 |         client = MLClient(
24 |             DefaultAzureCredential(),
25 |             subscription_id=subscription_id,
26 |             resource_group_name=resource_group_name,
27 |             workspace_name=workspace_name,
28 |         )
29 | 
30 |         workspace = client.workspaces.get(workspace_name)
31 |         logger.info(f"Reference to {workspace_name} has been obtained.")
32 |         return workspace
33 |     except Exception:
34 |         logger.exception(f"Not able to access workspace")
35 |         raise
36 | 
37 | 
38 | def main():
39 |     """Return AML workspace."""
40 |     parser = argparse.ArgumentParser("get_workspace")
41 |     parser.add_argument("--subscription_id", type=str, help="Azure subscription id")
42 |     parser.add_argument(
43 |         "--resource_group_name", type=str, help="Azure Machine learning resource group"
44 |     )
45 |     parser.add_argument(
46 |         "--workspace_name", type=str, help="Azure Machine learning Workspace name"
47 |     )
48 | 
49 |     args = parser.parse_args()
50 |     get_workspace(args.subscription_id, args.resource_group_name, args.workspace_name)
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     main()
55 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash
 2 | 
 3 | setup:
 4 | 	python -m venv /tmp/fridge_env
 5 | 	. /tmp/fridge_env/bin/activate
 6 | 	python -m pip install --upgrade pip
 7 | 	python -m pip install -r ./devops/pipelines/requirements/build_validation_requirements.txt
 8 | 
 9 | lint: setup
10 | 	flake8 .


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/__init__.py:
--------------------------------------------------------------------------------
1 | """The model factory for fridge object detection use case."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/config/model_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "models": [
 3 |     {
 4 |       "ML_MODEL_CONFIG_NAME": "fridge_obj_det",
 5 |       "ENV_NAME": "dev",
 6 |       "AZURE_RM_SVC_CONNECTION": "mfdemosc",
 7 |       "CLUSTER_NAME": "dev-pipeline",
 8 |       "CLUSTER_REGION": "eastus",
 9 |       "CLUSTER_SIZE": "STANDARD_DS3_V2",
10 |       "CONDA_PATH": "fridge_obj_det/mlops/environment/conda.yml",
11 |       "DISPLAY_BASE_NAME": "mlops",
12 |       "ENV_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
13 |       "ENVIRONMENT_NAME": "automl",
14 |       "EXPERIMENT_BASE_NAME": "automl-fridge-objects-detection-experiment",
15 |       "HOST_PORT_NUMBER": "8081",
16 |       "KEYVAULT_NAME": "mfdemoaml7626959246",
17 |       "ML_SCOPE_SVC_CONNECTION": "mlops_scoped",
18 |       "MODEL_BASE_NAME": "fasterrcnn_resnet18_fpn",
19 |       "REGISTRY_NAME": "mfdemoacr",
20 |       "RESOURCE_GROUP_NAME": "mfdemorg",
21 |       "SCORE_FILE_NAME": "score.py",
22 |       "WORKSPACE_NAME": "mfdemoaml",
23 |       "AML_ARTIFACT_PATH": "INPUT_onnx_model_artifacts_folder/train_artifacts"
24 |     },
25 |     {
26 |       "ML_MODEL_CONFIG_NAME": "fridge_obj_det",
27 |       "ENV_NAME": "prod",
28 |       "AZURE_RM_SVC_CONNECTION": "mfdemosc",
29 |       "CLUSTER_NAME": "dev-pipeline",
30 |       "CLUSTER_REGION": "eastus",
31 |       "CLUSTER_SIZE": "STANDARD_DS3_V2",
32 |       "CONDA_PATH": "fridge_obj_det/mlops/environment/conda.yml",
33 |       "DISPLAY_BASE_NAME": "mlops",
34 |       "ENV_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
35 |       "ENVIRONMENT_NAME": "automl",
36 |       "EXPERIMENT_BASE_NAME": "automl-fridge-objects-detection-experiment",
37 |       "HOST_PORT_NUMBER": "8081",
38 |       "KEYVAULT_NAME": "mfdemoaml7626959246",
39 |       "ML_SCOPE_SVC_CONNECTION": "mlops_scoped",
40 |       "MODEL_BASE_NAME": "fasterrcnn_resnet18_fpn",
41 |       "REGISTRY_NAME": "mfdemoacr",
42 |       "RESOURCE_GROUP_NAME": "mfdemorg",
43 |       "SCORE_FILE_NAME": "score.py",
44 |       "WORKSPACE_NAME": "mfdemoaml",
45 |       "AML_ARTIFACT_PATH": "INPUT_onnx_model_artifacts_folder/train_artifacts"
46 |     }
47 |   ]
48 | }
49 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/build_validation_pipeline.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 |   - name: model_type
 3 |     displayName: "type of model to execute"
 4 | 
 5 | jobs:
 6 | - job: Build_Validation_Pipeline
 7 |   workspace:
 8 |     clean: outputs | resources | all 
 9 |   steps:
10 |   - task: UsePythonVersion@0
11 |     displayName: 'Use Python 3.8'
12 |     inputs:
13 |       versionSpec: '3.8'
14 |       addToPath: true 
15 | 
16 |   - script: |
17 |       python -m venv env
18 |       source env/bin/activate
19 |       python -m pip install --upgrade pip
20 |       python -m pip install -r model_factory/${{parameters.model_type}}/devops/pipelines/requirements/build_validation_requirements.txt
21 |     displayName: "Load Python Dependencies"
22 |     
23 |   - script: |
24 |       source env/bin/activate
25 |       flake8 ./model_factory/${{parameters.model_type}}
26 |     displayName: "Lint with flake8"
27 | 
28 |   - script: |
29 |       source env/bin/activate
30 |       cd model_factory
31 |       export PYTHONPATH=.
32 |       pytest  ./${{parameters.model_type}} --ignore=sandbox/ --junitxml=junit/test-results.xml --cov=. --cov-report=xml
33 |     displayName: 'Run Unit Tests'
34 |     condition: succeededOrFailed()
35 | 
36 |   - task: PublishTestResults@2
37 |     condition: succeededOrFailed()
38 |     inputs:
39 |       testResultsFiles: '**/test-*.xml'
40 |       testRunTitle: 'Publish Test Results for Python $(python.version)'
41 | 
42 |   - task: PublishCodeCoverageResults@1
43 |     inputs:
44 |       codeCoverageTool: Cobertura
45 |       summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/fridge_obj_det_dev_pipeline.yml:
--------------------------------------------------------------------------------
 1 | # pr:
 2 | #   branches:
 3 | #    include:
 4 | #     #  - feature/{feature-branch-name}
 5 | #     #  - add feature branches that will build more features on top of frdige-obj-detection
 6 | #   paths:
 7 | #     include:
 8 | #       - model_factory/fridge_obj_det/*
 9 | #       - model_factory/common/devops/*
10 | 
11 | # trigger:
12 | #   branches:
13 | #      include:
14 | #       #  - feature/{feature-branch-name}
15 | #       #  - add feature branches that will build more features on top of frdige-obj-detection  
16 | #   paths:
17 | #     include:
18 | #       - model_factory/fridge_obj_det/*
19 | #       - model_factory/common/devops/*
20 | 
21 | # Replace this section with the above changes, when there is a new feature branch 
22 | # that will build on top of fridge-obj-detection
23 | pr: none
24 | trigger: none
25 | 
26 | parameters:
27 |  - name: exec_environment
28 |    displayName: "Execution Environment"
29 |    default: "dev"
30 |  - name: model_type
31 |    displayName: "type of model to execute"
32 |    default: "fridge_obj_det"
33 | 
34 | variables:
35 | - group: mlops_platform_${{parameters.exec_environment}}_vg
36 | - name: PIPELINE_TYPE
37 |   value: ${{parameters.model_type}}
38 | 
39 | stages:
40 |     - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/variables_template.yml
41 |       parameters:
42 |         exec_environment: ${{parameters.exec_environment}}
43 |         model_type: ${{parameters.model_type}}
44 | 
45 |     - stage: build_validation
46 |       displayName: build_validation
47 |       dependsOn: 
48 |       - variable_generation
49 |       variables:
50 |       - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/experiment_variables.yml
51 |       jobs:
52 |         - template: build_validation_pipeline.yml
53 |           parameters:
54 |             model_type: ${{ parameters.model_type }}
55 |         
56 |     - ${{ if ne(variables['Build.Reason'], 'PullRequest') }}:
57 |       - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/platform_dev_pipeline.yml
58 |         parameters:
59 |           exec_environment: ${{ parameters.exec_environment }}
60 |           model_type: ${{ parameters.model_type }}
61 |        
62 | 
63 |       


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/fridge_obj_det_main_pipeline.yml:
--------------------------------------------------------------------------------
 1 | trigger:
 2 |   none
 3 | pr:
 4 |   branches:
 5 |     include:
 6 |       - master
 7 |   paths:
 8 |     include:
 9 |       - model_factory/fridge_obj_det/*
10 |       - model_factory/common/devops/*
11 | 
12 | parameters:
13 |   - name: exec_environment
14 |     displayName: "Execution Environment"
15 |     default: "prod"
16 |   - name: model_type
17 |     displayName: "type of model to execute"
18 |     default: "fridge_obj_det"
19 |   - name: TARGET_CONDITION
20 |     displayName: "Target Condition for the deployment"
21 |     default: "tags.device='smoke-test-arm'"
22 | 
23 | variables:
24 |   - group: mlops_platform_${{parameters.exec_environment}}_vg
25 |   - name: PIPELINE_TYPE
26 |     value: ${{parameters.model_type}}
27 | 
28 | stages:
29 |   # Generate Variables
30 |   - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/variables_template.yml
31 |     parameters:
32 |       exec_environment: ${{parameters.exec_environment}}
33 |       model_type: ${{parameters.model_type}}
34 | 
35 |   # Run Linting and Unit tests
36 |   - stage: build_validation
37 |     displayName: build_validation
38 |     dependsOn:
39 |       - variable_generation
40 |     variables:
41 |       - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/experiment_variables.yml
42 |     jobs:
43 |       - template: build_validation_pipeline.yml
44 |         parameters:
45 |           model_type: ${{ parameters.model_type }}
46 | 
47 |   - ? ${{ if and(or(eq(variables['Build.Reason'], 'PullRequest'),eq(variables['Build.Reason'], 'Manual')), ne(variables['Build.SourceBranch'], 'refs/heads/master')) }}
48 |     : - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/platform_main_pipeline.yml
49 |         parameters:
50 |           exec_environment: "dev"
51 |           model_type: ${{ parameters.model_type }}
52 |           TARGET_CONDITION: ${{ parameters.TARGET_CONDITION }}
53 | 
54 |   - ${{ if eq(variables['Build.SourceBranch'], 'refs/heads/master') }}:
55 |     - stage: build_and_push_to_prod
56 |       dependsOn: 
57 |         - variable_generation
58 |         - build_validation
59 |       variables:
60 |         - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/experiment_variables.yml
61 |       jobs:
62 |         - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/image_generation_template.yml
63 |           parameters:
64 |             exec_environment: ${{parameters.exec_environment}}
65 |             model_type: ${{parameters.model_type}}
66 |             model_name: "${{parameters.model_type}}_prod_master"
67 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/fridge_obj_det_mlops_pipeline.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 |   - name: exec_environment
 3 |     displayName: "Execution Environment"
 4 |     default: "dev"
 5 |   - name: model_type
 6 |     displayName: "type of model to execute"
 7 |   - name: MODEL_NAME
 8 |     displayName: "model name"
 9 |     default: $(MODEL_NAME)
10 | steps:
11 | - template: ${{variables['System.DefaultWorkingDirectory']}}/model_factory/common/devops/templates/execute_mlops_pipeline.yml
12 |   parameters:
13 |     script_parameter: |
14 |       python -m ${{ parameters.model_type }}.mlops.src.mlops_pipeline \
15 |         --subscription_id $(SUBSCRIPTION_ID) \
16 |         --resource_group_name $(RESOURCE_GROUP_NAME) \
17 |         --workspace_name $(WORKSPACE_NAME) \
18 |         --cluster_name $(CLUSTER_NAME) \
19 |         --cluster_size $(CLUSTER_SIZE) \
20 |         --cluster_region $(CLUSTER_REGION) \
21 |         --build_reference $(BUILD.BUILDID) \
22 |         --deploy_environment ${{parameters.exec_environment}} \
23 |         --experiment_name $(EXPERIMENT_NAME) \
24 |         --display_name $(DISPLAY_NAME) \
25 |         --wait_for_completion True \
26 |         --environment_name $(ENVIRONMENT_NAME) \
27 |         --env_base_image_name $(ENV_BASE_IMAGE_NAME) \
28 |         --model_name ${{parameters.MODEL_NAME}} \
29 |         --conda_path $(CONDA_PATH)
30 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/requirements/build_validation_requirements.txt:
--------------------------------------------------------------------------------
 1 | flake8-docstrings==1.6.0
 2 | flake8==6.0.0
 3 | pep8-naming==0.13.0
 4 | pytest-cov==3.0.0
 5 | pytest-azurepipelines==1.0.3
 6 | pytest-mock==3.7.0
 7 | pytest==7.1.2
 8 | azure-ai-ml==1.9.0
 9 | azure-identity==1.13.0
10 | onnx==1.14.0
11 | onnxconverter-common==1.13.0
12 | typer==0.9.0


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/devops/pipelines/requirements/execute_job_requirements.txt:
--------------------------------------------------------------------------------
1 | azure-cli==2.47.0
2 | azure-ai-ml==1.9.0
3 | azure-identity==1.13.0
4 | azure-keyvault-secrets==4.7.0
5 | requests==2.31.0
6 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/environment/requirements.txt:
--------------------------------------------------------------------------------
1 | azureml-defaults


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/.gitkeep


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/__init__.py:
--------------------------------------------------------------------------------
1 | """Fridge objects AML training pipeline MLOps source."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/compare_map.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: map_comparision
 3 | version: 1
 4 | display_name: Compare 2 mAP values
 5 | type: command
 6 | inputs:
 7 |   map_before:
 8 |     type: uri_file
 9 |   map_after:
10 |     type: uri_file
11 | outputs:
12 |   metrics_json_file:
13 |     type: uri_file
14 | environment: azureml:conda-based-devenv-py38-cpu@latest
15 | code: ../../../
16 | command: >-
17 |   python fridge_obj_det/src/compare_map/compare_map.py
18 |   ${{inputs.map_before}} 
19 |   ${{inputs.map_after}}
20 |   ${{outputs.metrics_json_file}}
21 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/convert.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: convert_onnx_fp32_to_fp16
 3 | version: 1
 4 | display_name: convert ONNX from fp32 to fp16
 5 | type: command
 6 | inputs:
 7 |   fp32_input_dir:
 8 |     type: uri_folder
 9 | outputs:
10 |   fp16_output_dir:
11 |     type: uri_folder
12 | environment: azureml:conda-based-devenv-py38-cpu@latest
13 | code: ../../../
14 | command: >-
15 |   python fridge_obj_det/src/convert/convert_fp32_to_fp16.py 
16 |   ${{inputs.fp32_input_dir}} 
17 |   ${{outputs.fp16_output_dir}}
18 | 
19 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/prep.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: prepare_fridge_objects_data
 3 | display_name: Download, split and register fridge objects dataset as MLTables
 4 | version: 1
 5 | type: command
 6 | inputs:
 7 |   client_id:
 8 |     type: string
 9 |   client_secret:
10 |     type: string
11 |   tenant_id:
12 |     type: string
13 |   subscription_id:
14 |     type: string
15 |   resource_group_name:
16 |     type: string
17 |   workspace_name:
18 |     type: string
19 |   fridge_objects_uri_folder_name: 
20 |     type: string
21 |     default: fridge-items-images-object-detection
22 |   train_mltable_name:
23 |     type: string
24 |   val_mltable_name:
25 |     type: string
26 |   test_mltable_name:
27 |     type: string
28 | outputs: 
29 |   train_mltable:
30 |     type: mltable
31 |   val_mltable:
32 |     type: mltable
33 |   test_mltable:
34 |     type: mltable
35 | code: ../../../
36 | environment: azureml:conda-based-devenv-py38-cpu@latest
37 | command: >-
38 |   python -m fridge_obj_det.src.prep.prep 
39 |   --client_id ${{inputs.client_id}}
40 |   --client_secret ${{inputs.client_secret}}
41 |   --tenant_id ${{inputs.tenant_id}}
42 |   --subscription_id ${{inputs.subscription_id}} 
43 |   --resource_group_name ${{inputs.resource_group_name}} 
44 |   --workspace_name ${{inputs.workspace_name}} 
45 |   --fridge_objects_uri_folder_name ${{inputs.fridge_objects_uri_folder_name}}
46 |   --train_mltable_name ${{inputs.train_mltable_name}}
47 |   --val_mltable_name ${{inputs.val_mltable_name}}
48 |   --test_mltable_name ${{inputs.test_mltable_name}} 
49 |   --train_mltable ${{outputs.train_mltable}}
50 |   --val_mltable ${{outputs.val_mltable}}
51 |   --test_mltable ${{outputs.test_mltable}}
52 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/register.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: register_onnx_model
 3 | version: 1
 4 | display_name: Register an ONNX model as a custom model in the AML workspace.
 5 | type: command
 6 | inputs:
 7 |   client_id:
 8 |     type: string
 9 |   client_secret:
10 |     type: string
11 |   tenant_id:
12 |     type: string
13 |   subscription_id:
14 |     type: string
15 |   resource_group_name:
16 |     type: string
17 |   workspace_name:
18 |     type: string
19 |   onnx_model_artifacts_folder:
20 |     type: uri_folder
21 |   registered_model_name:
22 |     type: string
23 |   registered_model_description:
24 |     type: string
25 |   build_reference_id:
26 |     type: string
27 |   metrics_json_file:
28 |     type: uri_file
29 | environment: azureml:conda-based-devenv-py38-cpu@latest
30 | code: ../../../
31 | command: >-
32 |   python -m fridge_obj_det.src.register.register 
33 |   --client_id ${{inputs.client_id}}
34 |   --client_secret ${{inputs.client_secret}}
35 |   --tenant_id ${{inputs.tenant_id}}
36 |   --subscription_id ${{inputs.subscription_id}} 
37 |   --resource_group_name ${{inputs.resource_group_name}} 
38 |   --workspace_name ${{inputs.workspace_name}} 
39 |   --input_model_artifacts_path ${{inputs.onnx_model_artifacts_folder}}
40 |   --registered_model_name ${{inputs.registered_model_name}}
41 |   --registered_model_description "${{inputs.registered_model_description}}"
42 |   --build_reference ${{inputs.build_reference_id}}
43 |   --metrics_json_path ${{inputs.metrics_json_file}}
44 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/score.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: test_map_score
 3 | version: 1
 4 | display_name: calculate mAP score
 5 | type: command
 6 | inputs:
 7 |   model_folder_path:
 8 |     type: uri_folder
 9 |   mltable_folder:
10 |     type: uri_folder
11 | outputs:
12 |   results_file:
13 |     type: uri_file
14 | environment: azureml:conda-based-devenv-py38-cpu@latest
15 | code: ../../../
16 | command: >-
17 |   python fridge_obj_det/src/score/score.py
18 |   ${{inputs.model_folder_path}} 
19 |   ${{inputs.mltable_folder}} 
20 |   ${{outputs.results_file}}
21 | 
22 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/components/train.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: train_automl_object_detection_model
 3 | display_name: Train AutoML Object Detection Model
 4 | version: 1
 5 | type: command
 6 | inputs:
 7 |   client_id:
 8 |     type: string
 9 |   client_secret:
10 |     type: string
11 |   tenant_id:
12 |     type: string
13 |   subscription_id:
14 |     type: string
15 |   resource_group_name:
16 |     type: string
17 |   workspace_name:
18 |     type: string
19 |   training_mltable_path: 
20 |     type: mltable
21 |   validation_mltable_path:
22 |     type: mltable
23 |   automl_obj_det_model_name: 
24 |     type: string
25 |     default: fasterrcnn_resnet18_fpn
26 |   automl_compute_cluster_name:
27 |     type: string
28 |   automl_experiment_name:
29 |     type: string
30 |     optional: true
31 | outputs:
32 |   model_artifacts_dir:
33 |     type: uri_folder
34 | code: ../../../
35 | environment: azureml:conda-based-devenv-py38-cpu@latest
36 | command: >-
37 |   python -m fridge_obj_det.src.train.train
38 |   --client_id ${{inputs.client_id}}
39 |   --client_secret ${{inputs.client_secret}}
40 |   --tenant_id ${{inputs.tenant_id}}
41 |   --subscription_id ${{inputs.subscription_id}} 
42 |   --resource_group_name ${{inputs.resource_group_name}} 
43 |   --workspace_name ${{inputs.workspace_name}} 
44 |   --training_mltable_path ${{inputs.training_mltable_path}} 
45 |   --validation_mltable_path ${{inputs.validation_mltable_path}} 
46 |   --automl_compute_cluster_name ${{inputs.automl_compute_cluster_name}}
47 |   --automl_obj_det_model_name ${{inputs.automl_obj_det_model_name}}
48 |   $[[--automl_experiment_name ${{inputs.automl_experiment_name}}]]
49 |   --model_artifacts_dir ${{outputs.model_artifacts_dir}}
50 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/environment/conda.yml:
--------------------------------------------------------------------------------
 1 | name: fridge-object-detection
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip
 7 |   - pip:
 8 |     - onnx==1.14.0
 9 |     - onnxconverter-common==1.13.0
10 |     - onnxruntime-gpu==1.15.0
11 |     - typer==0.9.0
12 |     - scikit-learn==1.2.0
13 |     - mlflow==2.3.2
14 |     - azureml-mlflow==1.50.0
15 |     - azure-ai-ml==1.7.2
16 |     - azure-identity==1.13.0
17 |     - typer==0.9.0
18 |     - numpy==1.24.2
19 |     - pillow==9.5.0
20 |     - torch==2.0.1
21 |     - torchmetrics==0.11.4
22 |     - torchvision==0.15.2
23 |     - mltable==1.3.0
24 |     - pandas==2.0.0
25 |     - azure-keyvault-secrets==4.7.0
26 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/environment/create_devenv.py:
--------------------------------------------------------------------------------
 1 | """This script reads conda.yml file and creates an environment from it.
 2 | 
 3 | Run python create_devenv.py at this directory to create the environment.
 4 | This is meant to be used during development
 5 | """
 6 | 
 7 | from azure.ai.ml import MLClient
 8 | from azure.ai.ml.entities import Environment
 9 | from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
10 | 
11 | try:
12 |     credential = DefaultAzureCredential()
13 |     # Check if given credential can get token successfully.
14 |     credential.get_token("https://management.azure.com/.default")
15 | except Exception:
16 |     # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
17 |     credential = InteractiveBrowserCredential()
18 | env_docker_conda = Environment(
19 |     image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04",
20 |     conda_file="conda.yml",
21 |     name="conda-based-devenv-py38-cpu",
22 |     description="Environment created from a Docker image plus Conda environment.",
23 | )
24 | ml_client = MLClient.from_config(credential=credential)
25 | ml_client.environments.create_or_update(env_docker_conda)
26 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/mlops/src/__init__.py:
--------------------------------------------------------------------------------
1 | """The AML SDK v2 components based pipeline code."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/.python-version:
--------------------------------------------------------------------------------
1 | 3.9.12


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8-slim
 2 | 
 3 | RUN apt-get update &&\
 4 |     apt-get install -y --no-install-recommends gcc curl git libssl-dev openssl wget build-essential zlib1g-dev
 5 | 
 6 | RUN curl https://pyenv.run | bash && \
 7 |     echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.profile && \
 8 |     echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.profile && \
 9 |     echo 'eval "$(pyenv init -)"' >> ~/.profile
10 | 
11 | ENV PATH="/root/.pyenv/bin:${PATH}"
12 | ENV PYENV_ROOT="/root/.pyenv"
13 | 
14 | RUN pyenv install 3.9.12 &&\
15 |     pyenv global 3.9.12
16 | 
17 | ENV POETRY_HOME="/root/.poetry"
18 | ENV PATH="/root/.poetry/bin:${PATH}"
19 | 
20 | RUN curl -sSL https://install.python-poetry.org |  python3 -
21 | RUN poetry config virtualenvs.prefer-active-python true
22 | ENV PATH="/root/.pyenv/versions/3.9.12/bin:${PATH}"
23 | 
24 | # install packages
25 | COPY pyproject.toml poetry.lock .python-version /app/
26 | WORKDIR /app
27 | RUN poetry config installer.max-workers 10 &&\
28 |     poetry install --no-interaction --no-ansi -vvv
29 | 
30 | # Setup env for the the container
31 | ENV AZUREML_MODEL_DIR=/app/azureml-models
32 | ENV MODEL_LOG_PATH=/app/logs/
33 | ENV GUNICORN_LOG_LEVEL=debug
34 | 
35 | # Create dirs
36 | RUN mkdir -p $AZUREML_MODEL_DIR &&\
37 |     mkdir -p $MODEL_LOG_PATH
38 | 
39 | # Copy scoring files
40 | COPY scoring /app/scoring
41 | COPY server /app/server
42 | 
43 | # Copy Model
44 | COPY model_artifacts/* /app/azureml-models/
45 | 
46 | EXPOSE 8080
47 | 
48 | CMD poetry run gunicorn --timeout 600 -b=0.0.0.0:8080 --capture-output --log-level ${GUNICORN_LOG_LEVEL} server.__main__:app


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/__init__.py:
--------------------------------------------------------------------------------
1 | """TODO."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/model_artifacts/labels.json:
--------------------------------------------------------------------------------
1 | ["--bg--", "can", "carton", "milk_bottle", "water_bottle"]


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/model_artifacts/test_sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/model_artifacts/test_sample.jpg


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/package.json:
--------------------------------------------------------------------------------
1 | {
2 |     "name": "fridge_obj_det",
3 |     "scripts": {
4 |         "version": "semantic-release"
5 |       }
6 | }


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "fridge-obj-det-model"
 3 | version = "0.1.0"
 4 | description = "Model for detecting objects that can be placed in a fridge"
 5 | authors = ["Dev <dev@microsoft.com>"]
 6 | readme = "README.md"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = "3.9.12"
10 | onnxruntime = "1.14.1"
11 | numpy = "1.24.2"
12 | pillow = "10.0.1"
13 | azureml-core = "1.49.0"
14 | flask = "^2.3.2"
15 | gunicorn = "20.1.0"
16 | 
17 | [build-system]
18 | requires = ["poetry-core"]
19 | build-backend = "poetry.core.masonry.api"
20 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/scoring/__init__.py:
--------------------------------------------------------------------------------
1 | """TODO model package docstring."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/scoring/predict.py:
--------------------------------------------------------------------------------
  1 | """TODO model docstring."""
  2 | import onnxruntime
  3 | from typing import Dict, List
  4 | import logging
  5 | 
  6 | 
  7 | def get_batch_predictions_from_onnx(
  8 |     onnx_session: onnxruntime.InferenceSession,
  9 |     img_data_batch,
 10 |     model_img_width: int,
 11 |     model_img_height: int,
 12 |     object_class_names: List[str],
 13 |     score_threshold: float = 0.8,
 14 | ) -> List[List[Dict]]:
 15 |     """Perform predictions with ONNX runtime for a batch of images.
 16 | 
 17 |     Returns a list for each image in img_data_batch, where each list per image
 18 |     is a list of bounding box predictions (dict) with the following structure:
 19 | 
 20 |     [ # list length of img_data_batch
 21 |         [ # per image list of bounding box predictions
 22 |             {
 23 |                 'box': {
 24 |                     'topX': normalised top left bounding box X co-ordinate
 25 |                     'topY': normalised top left bounding box Y co-ordinate
 26 |                     'bottomX': normalised bottom right bounding box X co-ordinate
 27 |                     'bottomY': normalised bottom right bounding box Y co-ordinate
 28 |                 },
 29 |                 'label': bounding box class name,
 30 |                 'score': bounding box confidence score
 31 |             }
 32 |         ]
 33 |     ]
 34 | 
 35 |     Note that bounding box co-ordinates are normalised to the range [0, 1] to allow
 36 |     scaling to the original image size (as the original image may have beeen resized for
 37 |     model prediction).
 38 | 
 39 |     Args:
 40 |         onnx_session (onnxruntime.InferenceSession): the ONNX runtime inference session
 41 |          with model loaded.
 42 |         img_data_batch (List[ndarray]): pre-processed list of images ready for prediction, each image
 43 |          should have shape CxHxW.
 44 |         model_img_width (int): ONNX model image width
 45 |         model_img_height (int): ONNX model image height
 46 |         object_class_names (List[str]): Ordered list of object class names, will map model
 47 |          prediction indices to this list to get predicted object class names.
 48 |         score_threshold (float): confidence score threshold to filter predictions.
 49 |          Defaults to 0.8.
 50 | 
 51 |     Returns:
 52 |         (List[List[Dict]]): List of bounding box predictions per image in
 53 |          img_data_batch.
 54 |     """
 55 |     sess_input = onnx_session.get_inputs()
 56 |     sess_output = onnx_session.get_outputs()
 57 | 
 58 |     output_names = [output.name for output in sess_output]
 59 | 
 60 |     batch_predictions = []
 61 |     for img_data in img_data_batch:
 62 |         try:
 63 |             predictions = onnx_session.run(
 64 |                 output_names=output_names, input_feed={sess_input[0].name: [img_data]}
 65 |             )
 66 |             batch_predictions.append(predictions)
 67 |         except BaseException as error:
 68 |             logging.error('Error at %s', 'while running predictions using onnxruntime', exc_info=error)
 69 |             raise
 70 | 
 71 |     logging.info("batch predictions completed, no. of predictions: %s", len(batch_predictions))
 72 |     # Filter the results with threshold.
 73 |     filtered_boxes_batch = []
 74 |     for batch_sample in batch_predictions:
 75 |         # in case of retinanet change the order of boxes, labels, scores to boxes, scores, labels
 76 |         # confirm the same from order of boxes, labels, scores output_names
 77 |         boxes, labels, scores = batch_sample[0], batch_sample[1], batch_sample[2]
 78 |         bounding_boxes = _get_prediction(
 79 |             boxes,
 80 |             labels,
 81 |             scores,
 82 |             (model_img_height, model_img_width),
 83 |             object_class_names,
 84 |         )
 85 |         filtered_bounding_boxes = [
 86 |             box for box in bounding_boxes if box["score"] >= score_threshold
 87 |         ]
 88 |         filtered_boxes_batch.append(filtered_bounding_boxes)
 89 |     logging.info("No. of filtered predictions: %s", len(filtered_boxes_batch))
 90 |     return filtered_boxes_batch
 91 | 
 92 | 
 93 | def _get_box_dims(image_shape, box):
 94 |     box_keys = ["topX", "topY", "bottomX", "bottomY"]
 95 |     height, width = image_shape[0], image_shape[1]
 96 | 
 97 |     box_dims = dict(zip(box_keys, [coordinate.item() for coordinate in box]))
 98 | 
 99 |     box_dims["topX"] = box_dims["topX"] * 1.0 / width
100 |     box_dims["bottomX"] = box_dims["bottomX"] * 1.0 / width
101 |     box_dims["topY"] = box_dims["topY"] * 1.0 / height
102 |     box_dims["bottomY"] = box_dims["bottomY"] * 1.0 / height
103 |     return box_dims
104 | 
105 | 
106 | def _get_prediction(boxes, labels, scores, image_shape, classes):
107 |     bounding_boxes = []
108 |     for box, label_index, score in zip(boxes, labels, scores):
109 |         box_dims = _get_box_dims(image_shape, box)
110 | 
111 |         box_record = {
112 |             "box": box_dims,
113 |             "label": classes[label_index],
114 |             "score": score.item(),
115 |         }
116 | 
117 |         bounding_boxes.append(box_record)
118 | 
119 |     return bounding_boxes
120 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/scoring/prepare.py:
--------------------------------------------------------------------------------
 1 | """TODO model package docstring."""
 2 | from typing import List
 3 | from PIL import Image
 4 | import numpy as np
 5 | import io
 6 | 
 7 | 
 8 | def preprocess_image_for_prediction(image: Image, height_onnx: int, width_onnx: int):
 9 |     """Perform pre-processing on raw input image.
10 | 
11 |     Transform, resize and normalize the image for expected Faster-RCNN ONNX prediction.
12 | 
13 |     Args:
14 |         image (Image): PIL.Image loaded image.
15 |         height_onnx (int): ONNX model expected image height.
16 |         width_onnx (int): ONNX model expected image width.
17 | 
18 |     Returns:
19 |         ndarray: Pre-processed image in numpy format, shape: 1xCxHxW
20 |     """
21 |     image = image.convert("RGB")
22 |     image = image.resize((width_onnx, height_onnx))
23 |     np_image = np.array(image)
24 |     # HWC -> CHW
25 |     np_image = np_image.transpose(2, 0, 1)  # CxHxW
26 |     # normalize the image
27 |     mean_vec = np.array([0.485, 0.456, 0.406])
28 |     std_vec = np.array([0.229, 0.224, 0.225])
29 |     norm_img_data = np.zeros(np_image.shape).astype("float32")
30 |     for i in range(np_image.shape[0]):
31 |         norm_img_data[i, :, :] = (np_image[i, :, :] / 255 - mean_vec[i]) / std_vec[i]
32 |     np_image = np.expand_dims(norm_img_data, axis=0)  # 1xCxHxW
33 |     return np_image
34 | 
35 | 
36 | def prepare_image_prediction_batch(
37 |     batch_image_files: List[str],
38 |     model_img_width: int,
39 |     model_img_height: int,
40 |     batch_size: int,
41 | ) -> List:
42 |     """Pre-process list of image filenames for inference.
43 | 
44 |     Transform list of images (of batch_size) into a batch that is ready to be passed into
45 |     model prediction. The result of this function can be passed to get_batch_predictions_from_onnx()
46 |     for inference results on each image.
47 | 
48 |     Args:
49 |         batch_image_files (List[str]): local file paths to images, length must be
50 |          same as batch_size
51 |         model_img_width (int): expected ONNX model input image width
52 |         model_img_height (int): expected ONNX model input image height
53 |         batch_size (int): batch size of images to prepare, should be equal
54 |          to len(batch_image_files)
55 | 
56 |     Returns:
57 |         List[ndarray]: pre-processed image batch with length == batch_image_files
58 |     """
59 |     img_processed_list = []
60 |     for i in range(batch_size):
61 |         img = Image.open(io.BytesIO(batch_image_files[i]))
62 |         img_processed_list.append(
63 |             preprocess_image_for_prediction(img, model_img_height, model_img_width)
64 |         )
65 | 
66 |     if len(img_processed_list) > 1:
67 |         img_data = np.concatenate(img_processed_list)
68 |     elif len(img_processed_list) == 1:
69 |         img_data = img_processed_list[0]
70 |     else:
71 |         img_data = None
72 | 
73 |     assert batch_size == img_data.shape[0]
74 | 
75 |     return img_data
76 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/scoring/score.py:
--------------------------------------------------------------------------------
  1 | """TODO model package docstring."""
  2 | from typing import List, Tuple
  3 | import logging
  4 | import onnxruntime
  5 | import json
  6 | import os
  7 | import base64
  8 | from scoring.prepare import prepare_image_prediction_batch
  9 | from scoring.predict import get_batch_predictions_from_onnx
 10 | 
 11 | 
 12 | def _load_onnx_session(
 13 |     class_labels_json, onnx_model_path
 14 | ) -> Tuple[onnxruntime.InferenceSession, List[str]]:
 15 |     """Load model ONNX inference session.
 16 | 
 17 |     Load ONNX model and associated class labels name list to associate model
 18 |     prediction indices into an ONNX inference session.
 19 | 
 20 |     Args:
 21 |         class_labels_json (str): path to local object class labels JSON file
 22 |         onnx_model_path (str): onnx model binary path
 23 | 
 24 |     Returns:
 25 |         Tuple[onnxruntime.InferenceSession, List[str]]: a tuple of the loaded
 26 |         ONNX inference session and the class labels list
 27 |     """
 28 |     with open(class_labels_json) as f:
 29 |         class_names = json.load(f)
 30 |     session = onnxruntime.InferenceSession(
 31 |         onnx_model_path,
 32 |         providers=['CPUExecutionProvider']
 33 |     )
 34 |     return session, class_names
 35 | 
 36 | 
 37 | def get_onnx_model_img_dims(
 38 |     onnx_session: onnxruntime.InferenceSession,
 39 | ) -> Tuple[int, int]:
 40 |     """For a loaded ONNX model, get the expected image width and height to correctly perform inference.
 41 | 
 42 |     Args:
 43 |         onnx_session (onnxruntime.InferenceSession): loaded ONNX model
 44 | 
 45 |     Returns:
 46 |         Tuple[int, int]: (ONNX model image width, ONNX model image height)
 47 |     """
 48 |     batch, channel, height_onnx, width_onnx = onnx_session.get_inputs()[0].shape
 49 |     return width_onnx, height_onnx
 50 | 
 51 | 
 52 | def init():
 53 |     """TODO model package docstring."""
 54 |     logging.info("Init started")
 55 |     classes_json_file_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), "labels.json")
 56 |     onnx_file_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), "model.onnx")
 57 |     onnx_session, class_names = _load_onnx_session(classes_json_file_path, onnx_file_path)
 58 |     width_onnx, height_onnx = get_onnx_model_img_dims(onnx_session)
 59 | 
 60 |     logging.info("Loaded models in the memory")
 61 | 
 62 |     global inference_variables
 63 |     inference_variables = {}
 64 |     inference_variables["onnx_session"] = onnx_session
 65 |     inference_variables["class_names"] = class_names
 66 |     inference_variables["width_onnx"] = width_onnx
 67 |     inference_variables["height_onnx"] = height_onnx
 68 |     logging.info("Init complete")
 69 | 
 70 | 
 71 | def run(raw_data):
 72 |     """TODO model package docstring."""
 73 |     # convert base64 string to images
 74 |     logging.info("Received a request to images")
 75 | 
 76 |     request = json.loads(raw_data)
 77 |     prediction_image_bytes = []
 78 |     for encoded_image_data in request["images"]:
 79 |         imgdata = base64.b64decode(encoded_image_data)
 80 |         prediction_image_bytes.append(imgdata)
 81 | 
 82 |     batch_size = len(prediction_image_bytes)
 83 |     logging.info("Request contains {} image(s) for inference".format(batch_size))
 84 | 
 85 |     # Prepare the batch of images to send to ONNX model for prediction
 86 |     predictions_img_batch = prepare_image_prediction_batch(
 87 |         batch_image_files=prediction_image_bytes,
 88 |         model_img_width=inference_variables["width_onnx"],
 89 |         model_img_height=inference_variables["height_onnx"],
 90 |         batch_size=batch_size,
 91 |     )
 92 |     logging.info("Prepared the batch of images")
 93 | 
 94 |     # Get the model object predictions for each image in the batch
 95 |     bbox_predictions = get_batch_predictions_from_onnx(
 96 |         onnx_session=inference_variables["onnx_session"],
 97 |         img_data_batch=predictions_img_batch,
 98 |         model_img_width=inference_variables["width_onnx"],
 99 |         model_img_height=inference_variables["height_onnx"],
100 |         object_class_names=inference_variables["class_names"],
101 |         score_threshold=0.8,
102 |     )
103 |     logging.info("Done with the prediction: Results are {}", bbox_predictions)
104 | 
105 |     return bbox_predictions
106 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/model/server/__main__.py:
--------------------------------------------------------------------------------
 1 | """Flask application wrapper for the serving the models."""
 2 | 
 3 | 
 4 | import logging
 5 | from flask import Flask, request, jsonify
 6 | from scoring.score import init, run
 7 | app = Flask('app')
 8 | init()
 9 | 
10 | 
11 | @app.route('/healthcheck', methods=['GET'])
12 | def health_check():
13 |     """
14 |     Define an API endpoint that checks the health of the server.
15 | 
16 |     Returns:
17 |     - (JSON): {"status": "OK"}
18 |     """
19 |     return jsonify({"status": "OK"})
20 | 
21 | 
22 | @app.route('/score', methods=['POST'])
23 | def score():
24 |     """
25 |     Define an API endpoint to score input data.
26 | 
27 |     Parameters:
28 |     - raw_data (bytes): The raw input data to score, sent in the request body.
29 | 
30 |     Returns:
31 |     - (JSON): The scoring results from the `run()` function.
32 |     """
33 |     return run(raw_data=request.data)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     app.run(host='0.0.0.0', port=5001)
38 | else:
39 |     gunicorn_logger = logging.getLogger('gunicorn.error')
40 |     app.logger.handlers = gunicorn_logger.handlers
41 |     app.logger.setLevel(gunicorn_logger.level)
42 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/__init__.py:
--------------------------------------------------------------------------------
1 | """Fridge objects training pipeline src code."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/compare_map/__init__.py:
--------------------------------------------------------------------------------
1 | """TODO fill in."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/compare_map/compare_map.py:
--------------------------------------------------------------------------------
  1 | """Convert all onnx models in input_dir and downcast the weights to fp16."""
  2 | from warnings import warn
  3 | from decimal import Decimal
  4 | 
  5 | import json
  6 | import typer
  7 | from pathlib import Path
  8 | 
  9 | 
 10 | def extract_map(
 11 |         results_file: Path
 12 | ) -> float:
 13 |     """Extract mAP@0.5 from results.json file.
 14 | 
 15 |     Args:
 16 |         results_file (Path): Path to results file containing
 17 |             results.json file with mAP metrics.
 18 | 
 19 |     Returns:
 20 |         float: mAP@0.5 score
 21 |     """
 22 |     with open(results_file, 'r') as f:
 23 |         results = json.load(f)
 24 | 
 25 |     map_score = results['map_50']
 26 |     return map_score
 27 | 
 28 | 
 29 | def create_metrics_json(
 30 |     map_before: float,
 31 |     map_after: float,
 32 |     metrics_json_file: Path
 33 | ):
 34 |     """Create metrics json file with mAP scores.
 35 | 
 36 |     Args:
 37 |         map_before (float): fp32 mAP score.
 38 |         map_after (float): fp16 mAP score.
 39 |         metrics_json_file (Path): Path to metrics json file for writing.
 40 |     """
 41 |     map_dict = {
 42 |         "map_onnx_fp16": map_after,
 43 |         "map_onnx_fp32": map_before}
 44 |     json_content = json.dumps(map_dict)
 45 |     with open(metrics_json_file, "w") as f:
 46 |         f.write(json_content)
 47 | 
 48 | 
 49 | def compare_scores(
 50 |     map_before: float,
 51 |     map_after: float,
 52 |     tolerance: float = 0.01,
 53 |     throws_error: bool = False,
 54 | ):
 55 |     """Compare mAP before and after onnx fp16 conversion and raise ValueError or print warning if mAP drop is beyond tolerance.
 56 | 
 57 |     Args:
 58 |         map_before (float): mAP metric before onnx fp16 conversion.
 59 |         map_after (float): mAP metric after onnx fp16 conversion.
 60 |         tolerance (float, optional): threshold to tolerate mAP value drop.
 61 |           map_before - map_after <= tolerance will be considered as acceptable.
 62 |           Defaults to 0.01.
 63 |         throws_error (bool, optional): whether to throw error when mAP drop is
 64 |           beyond tolerance. When this is off, it will print warning instead.
 65 |           Defaults to False.
 66 | 
 67 |     Raises:
 68 |         ValueError: raised when throws_error is True and mAP dropped beyond tolerance
 69 |     """
 70 |     # without Decimal,
 71 |     # >>> a = 0.98
 72 |     # >>> b = 0.97
 73 |     # >>> a-b
 74 |     # 0.010000000000000009
 75 |     # this causes a - b > 0.01 to be True:
 76 |     # >>> float(Decimal(str(a)) - Decimal(str(b))) == 0.01
 77 |     # True
 78 |     map_before = Decimal(str(map_before))
 79 |     map_after = Decimal(str(map_after))
 80 |     if float(map_before - map_after) > tolerance:
 81 |         if throws_error:
 82 |             raise ValueError(
 83 |                 f"mAP dropped {map_before - map_after} beyond tolerance {tolerance}."
 84 |                 f" mAP before conversion: {map_before},"
 85 |                 f" mAP after conversion: {map_after}"
 86 |             )
 87 |         else:
 88 |             warn(
 89 |                 f"mAP dropped {map_before - map_after} beyond tolerance {tolerance}."
 90 |                 f" mAP before conversion: {map_before},"
 91 |                 f" mAP after conversion: {map_after}"
 92 |             )
 93 |     if 0 <= float(map_before - map_after) <= tolerance:
 94 |         print(
 95 |             f"mAP dropped {map_before - map_after} within tolerance {tolerance}."
 96 |             f" mAP before conversion: {map_before}, mAP after conversion: {map_after}"
 97 |         )
 98 |     if float(map_before - map_after) < 0:
 99 |         print(
100 |             f"mAP increased {abs(map_before - map_after)}."
101 |             f" mAP before conversion: {map_before}, mAP after conversion: {map_after}"
102 |         )
103 | 
104 | 
105 | def compare_map_before_and_after_conversion(
106 |     fp32_results_file: Path,
107 |     fp16_results_file: Path,
108 |     metrics_json_file: Path,
109 |     tolerance: float = 0.01,
110 |     throws_error: bool = False,
111 | ):
112 |     """Compare mAP before and after onnx fp16 conversion and raise ValueError or print warning if mAP drop is beyond tolerance.
113 | 
114 |     Reads metrics files from scoring steps and extracts mAP@0.5 before comparing
115 |     them. Raises an error/prints a warning if the mAP drop is beyond tolerance,
116 |     and writes a json file with these metrics.
117 | 
118 |     Args:
119 |         fp32_results_file (Path): mAP metrics before onnx fp16 conversion
120 |         fp16_results_file (Path): mAP metrics after onnx fp16 conversion
121 |         metrics_json_file (str): Path to metrics json file for writing.
122 |         tolerance (float, optional): threshold to tolerate mAP value drop.
123 |           map_before - map_after <= tolerance will be considered as acceptable.
124 |           Defaults to 0.01.
125 |         throws_error (bool, optional): whether to throw error when mAP drop is
126 |           beyond tolerance. When this is off, it will print warning instead.
127 |           Defaults to False.
128 | 
129 |     Raises:
130 |         ValueError: raised when throws_error is True and mAP dropped beyond tolerance
131 |     """
132 |     map_before = extract_map(fp32_results_file)
133 |     map_after = extract_map(fp16_results_file)
134 | 
135 |     compare_scores(map_before, map_after, tolerance, throws_error)
136 | 
137 |     create_metrics_json(map_before, map_after, metrics_json_file)
138 | 
139 | 
140 | if __name__ == "__main__":
141 |     typer.run(compare_map_before_and_after_conversion)
142 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/compare_map/compare_pipeline.py:
--------------------------------------------------------------------------------
 1 | """This script runs mlops/components/compare_map.yml.
 2 | 
 3 | this step will eventually be integrated into one AML pipeline that is under development
 4 | This is currently for testing purpose for individual AML component
 5 | """
 6 | from azure.ai.ml import MLClient, load_component
 7 | from azure.ai.ml.dsl import pipeline
 8 | from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
 9 | 
10 | compare_map_component = load_component(source="mlops/components/compare_map.yml")
11 | 
12 | 
13 | @pipeline(
14 |     default_compute="dev-pipeline",
15 | )
16 | def compare_map_pipeline(map_before, map_after):
17 |     """Run compare_map component."""
18 |     compare_map_component(map_before=map_before, map_after=map_after)
19 | 
20 | 
21 | # create a pipeline
22 | # TODO: these input values will be replaced
23 | # with the actual computed values from previous steps
24 | pipeline_job = compare_map_pipeline(map_before=0.98, map_after=0.97)
25 | 
26 | try:
27 |     credential = DefaultAzureCredential()
28 |     # Check if given credential can get token successfully.
29 |     credential.get_token("https://management.azure.com/.default")
30 | except Exception:
31 |     # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
32 |     credential = InteractiveBrowserCredential()
33 | 
34 | # Get a handle to workspace
35 | ml_client = MLClient.from_config(credential=credential)
36 | pipeline_job = ml_client.jobs.create_or_update(
37 |     pipeline_job, experiment_name="map_comparison"
38 | )
39 | # wait until the job completes
40 | ml_client.jobs.stream(pipeline_job.name)
41 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/convert/convert_fp32_to_fp16.py:
--------------------------------------------------------------------------------
 1 | """Convert all onnx models in input_dir and downcast the weights to fp16."""
 2 | from pathlib import Path
 3 | from shutil import copyfile
 4 | import onnx
 5 | from onnxconverter_common import float16
 6 | import typer
 7 | 
 8 | 
 9 | def convert_fp32_to_fp16(input_dir: Path, output_dir: Path):
10 |     """Convert model.onnx in input_dir and downcast the weights to fp16.
11 | 
12 |     Also copies over the associated labels.json file.
13 | 
14 |     Args:
15 |         input_dir (Path): directory that contains fp32 onnx model
16 |         output_dir (Path): directory where downcasted fp16 onnx model is stored
17 |          When this directory doesn't exist, target directory is created
18 |     """
19 |     onnx_file = Path(input_dir, 'train_artifacts/model.onnx')
20 |     label_file = Path(input_dir, 'train_artifacts/labels.json')
21 |     if not onnx_file.is_file():
22 |         raise FileNotFoundError(
23 |             f"Directory: {input_dir} had no .onnx files."
24 |             " Conversion process is terminated."
25 |         )
26 | 
27 |     output_dir = Path(output_dir, "train_artifacts")
28 |     output_dir.mkdir(parents=True, exist_ok=True)
29 | 
30 |     model = onnx.load(onnx_file)
31 |     print(f"{onnx_file} will be converted to fp16")
32 |     model_fp16 = float16.convert_float_to_float16(model)
33 |     saved_model_path = Path(output_dir, "model.onnx")
34 |     onnx.save(model_fp16, saved_model_path)
35 |     print(
36 |         "Conversion was successful and"
37 |         f" fp16 onnx model was saved at {saved_model_path}"
38 |     )
39 | 
40 |     fp16_label_file = Path(output_dir, 'labels.json')
41 |     copyfile(label_file, fp16_label_file)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     typer.run(convert_fp32_to_fp16)
46 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/convert/convert_pipeline.py:
--------------------------------------------------------------------------------
 1 | """This script runs mlops/components/convert.yml.
 2 | 
 3 | this step will eventually be integrated into one AML pipeline that is under development
 4 | This is currently for testing purpose for individual AML component
 5 | """
 6 | from azure.ai.ml import Input, MLClient, load_component
 7 | from azure.ai.ml.dsl import pipeline
 8 | from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
 9 | 
10 | onnx_test_ds = Input(
11 |     path="azureml://datastores/workspaceblobstore/paths/onnx_test_fp32"
12 | )
13 | 
14 | convert_component = load_component(source="mlops/components/convert.yml")
15 | 
16 | 
17 | @pipeline(
18 |     default_compute="dev-pipeline",
19 | )
20 | def convert_pipeline(input: Input):
21 |     """Run convert component."""
22 |     convert_component(fp32_input_dir=input)
23 |     # this output will be used when this is integrated with other components
24 |     # convert_node.outputs.fp16_output_dir
25 | 
26 | 
27 | # create a pipeline
28 | pipeline_job = convert_pipeline(input=onnx_test_ds)
29 | 
30 | try:
31 |     credential = DefaultAzureCredential()
32 |     # Check if given credential can get token successfully.
33 |     credential.get_token("https://management.azure.com/.default")
34 | except Exception:
35 |     # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
36 |     credential = InteractiveBrowserCredential()
37 | 
38 | # Get a handle to workspace
39 | ml_client = MLClient.from_config(credential=credential)
40 | pipeline_job = ml_client.jobs.create_or_update(
41 |     pipeline_job, experiment_name="model_conversion"
42 | )
43 | # wait until the job completes
44 | ml_client.jobs.stream(pipeline_job.name)
45 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/prep/__init__.py:
--------------------------------------------------------------------------------
1 | """Prep step for fridge objects dataset."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/prep/voc_jsonl_converter.py:
--------------------------------------------------------------------------------
  1 | """Pascal VOC to JSONL conversion for object detection annotations."""
  2 | import os
  3 | from xml.etree import ElementTree
  4 | import json
  5 | 
  6 | 
  7 | class JSONLConverter:
  8 |     """
  9 |     Base class for JSONL converters.
 10 | 
 11 |     ...
 12 |     Attributes
 13 |     ---------
 14 |     base_url : str
 15 |         the base for the image_url to be written into the jsonl file
 16 |     """
 17 | 
 18 |     def __init__(self, base_url: str):
 19 |         """Construct JSONLConverter.
 20 | 
 21 |         Args:
 22 |             base_url (str): the base for the image_url to be written into the jsonl file.
 23 |         """
 24 |         self.jsonl_data = []
 25 |         self.base_url = base_url
 26 | 
 27 |     def convert(self):
 28 |         """Inheriters should implement this method.
 29 | 
 30 |         Raises:
 31 |             NotImplementedError: when called on base class directly.
 32 |         """
 33 |         raise NotImplementedError
 34 | 
 35 | 
 36 | def write_json_lines(converter: JSONLConverter, filename: str):
 37 |     """Convert and write a JSONL file.
 38 | 
 39 |     Parameters:
 40 |         converter (JSONLConverter): the converter use to generate the jsonl
 41 |         filename (str): output file for writing jsonl
 42 |     """
 43 |     json_lines_data = converter.convert()
 44 |     with open(filename, "w") as outfile:
 45 |         for json_line in json_lines_data:
 46 |             json.dump(json_line, outfile, separators=(",", ":"))
 47 |             outfile.write("\n")
 48 |         print(f"Conversion completed. Converted {len(json_lines_data)} lines.")
 49 | 
 50 | 
 51 | class VOCJSONLConverter(JSONLConverter):
 52 |     """Class for converting VOC data for object detection into jsonl files."""
 53 | 
 54 |     def __init__(self, base_url: str, xml_dir: str):
 55 |         """Create VOCJSONLConverter.
 56 | 
 57 |         ...
 58 |         Attributes
 59 |         ---------
 60 |         base_url : str
 61 |             the base for the image_url to be written into the jsonl file
 62 |         xml_dir : str
 63 |             directory of xml annotation files
 64 |         """
 65 |         super().__init__(base_url=base_url)
 66 |         self.xml_dir = xml_dir
 67 | 
 68 |     def convert(self):
 69 |         """Generate jsonl data for object detection or instance segmentation.
 70 | 
 71 |         return: list of lines for jsonl
 72 |         rtype: List <class 'dict'>
 73 | 
 74 |         """
 75 |         json_line_sample = {
 76 |             "image_url": self.base_url,
 77 |             "image_details": {"format": None, "width": None, "height": None},
 78 |             "label": [],
 79 |         }
 80 | 
 81 |         for i, filename in enumerate(os.listdir(self.xml_dir)):
 82 |             if not filename.endswith(".xml"):
 83 |                 print(f"Skipping unknown file: {filename}")
 84 |                 continue
 85 | 
 86 |             annotation_filename = os.path.join(self.xml_dir, filename)
 87 |             print(f"Parsing {annotation_filename}")
 88 | 
 89 |             root = ElementTree.parse(annotation_filename).getroot()
 90 |             width = int(root.find("size/width").text)
 91 |             height = int(root.find("size/height").text)
 92 | 
 93 |             labels = []
 94 |             for index, object in enumerate(root.findall("object")):
 95 |                 name = object.find("name").text
 96 |                 is_crowd = int(object.find("difficult").text)
 97 | 
 98 |                 xmin = object.find("bndbox/xmin").text
 99 |                 ymin = object.find("bndbox/ymin").text
100 |                 xmax = object.find("bndbox/xmax").text
101 |                 ymax = object.find("bndbox/ymax").text
102 | 
103 |                 labels.append(
104 |                     {
105 |                         "label": name,
106 |                         "topX": float(xmin) / width,
107 |                         "topY": float(ymin) / height,
108 |                         "bottomX": float(xmax) / width,
109 |                         "bottomY": float(ymax) / height,
110 |                         "isCrowd": is_crowd,
111 |                     }
112 |                 )
113 | 
114 |             # build the jsonl file
115 |             image_filename = root.find("filename").text
116 |             _, file_extension = os.path.splitext(image_filename)
117 |             json_line = dict(json_line_sample)
118 |             json_line["image_url"] = os.path.join(
119 |                 json_line["image_url"], image_filename
120 |             )
121 |             json_line["image_details"]["format"] = file_extension[1:]
122 |             json_line["image_details"]["width"] = width
123 |             json_line["image_details"]["height"] = height
124 |             json_line["label"] = labels
125 | 
126 |             self.jsonl_data.append(json_line)
127 |         return self.jsonl_data
128 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/register/__init__.py:
--------------------------------------------------------------------------------
1 | """Model registration in AML workspace module."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/register/register.py:
--------------------------------------------------------------------------------
  1 | """Register the ONNX model to AML workspace."""
  2 | import argparse
  3 | import json
  4 | from azure.ai.ml.entities import Model
  5 | from azure.ai.ml.constants import AssetTypes
  6 | from common.mlops.get_aml_client import get_aml_client
  7 | import logging
  8 | import shutil
  9 | 
 10 | 
 11 | def main(
 12 |     client_id: str,
 13 |     client_secret: str,
 14 |     tenant_id: str,
 15 |     subscription_id: str,
 16 |     resource_group_name: str,
 17 |     workspace_name: str,
 18 |     input_model_artifacts_path: str,
 19 |     registered_model_name: str,
 20 |     registered_model_description: str,
 21 |     build_reference: str,
 22 |     metrics_json_path: str
 23 | ):
 24 |     """Register the ONNX model to the AML workspace.
 25 | 
 26 |     Args:
 27 |         client_id (str): AAD client ID.
 28 |         client_secret (str): AAD client secret.
 29 |         tenant_id (str): AAD tenant ID.
 30 |         subscription_id (str): AML subscription ID.
 31 |         resource_group_name (str): AML resource group name.
 32 |         workspace_name (str): AML workspace name.
 33 |         input_model_artifacts_path (str): the path to the input model artifacts. Should
 34 |          contain the model ONNX file and the labels.json file.
 35 |         registered_model_name (str): the name of the registered model in AML.
 36 |         registered_model_description (str): the description of the registered model in AML.
 37 |         build_reference (str): the AzDO build reference that generated the model.
 38 |         metrics_json_path (str): the path to the metrics.json file containing the mAP score
 39 |          on the test set for the ONNX FP32 model and the ONNX FP16 model.
 40 |     """
 41 |     # Create ML Client
 42 |     ml_client = get_aml_client(
 43 |         client_id=client_id,
 44 |         client_secret=client_secret,
 45 |         tenant_id=tenant_id,
 46 |         subscription_id=subscription_id,
 47 |         resource_group_name=resource_group_name,
 48 |         workspace_name=workspace_name,
 49 |     )
 50 | 
 51 |     if ml_client is None:
 52 |         raise Exception("Could not create MLClient")
 53 | 
 54 |     print(f"ML Client created successfully {str(ml_client)}")
 55 | 
 56 |     # load the metrics file and get the mAP scores
 57 |     with open(metrics_json_path, "r") as f:
 58 |         metrics = json.load(f)
 59 |         map_onnx_fp16 = metrics["map_onnx_fp16"]
 60 |         map_onnx_fp32 = metrics["map_onnx_fp32"]
 61 | 
 62 |     compressed_model_file = shutil.make_archive(
 63 |         base_name="model_artifacts", format="gztar", root_dir=input_model_artifacts_path
 64 |     )
 65 | 
 66 |     onnx_model = Model(
 67 |         path=compressed_model_file,
 68 |         name=registered_model_name,
 69 |         description=registered_model_description,
 70 |         type=AssetTypes.CUSTOM_MODEL,
 71 |         tags={
 72 |             "build_reference": build_reference,
 73 |             "map_onnx_fp16": map_onnx_fp16,
 74 |             "map_onnx_fp32": map_onnx_fp32,
 75 |         },
 76 |     )
 77 |     registered_model = ml_client.models.create_or_update(onnx_model)
 78 |     logging.info(f"The registered model ID: {registered_model.id}")
 79 | 
 80 | 
 81 | if __name__ == "__main__":
 82 |     parser = argparse.ArgumentParser()
 83 |     parser.add_argument("--client_id", type=str, help="Azure client id")
 84 |     parser.add_argument("--client_secret", type=str, help="Azure client secret")
 85 |     parser.add_argument("--tenant_id", type=str, help="Azure tenant id")
 86 | 
 87 |     parser.add_argument("--subscription_id", type=str,
 88 |                         help="Azure subscription id")
 89 |     parser.add_argument(
 90 |         "--resource_group_name", type=str, help="Azure Machine learning resource group"
 91 |     )
 92 |     parser.add_argument(
 93 |         "--workspace_name", type=str, help="Azure Machine learning Workspace name"
 94 |     )
 95 |     parser.add_argument(
 96 |         "--input_model_artifacts_path",
 97 |         type=str,
 98 |         help="The path to the input model artifacts. Should include the ONNX model and the labels.json file.",
 99 |     )
100 |     parser.add_argument(
101 |         "--registered_model_name",
102 |         type=str,
103 |         default="fridge-objects-automl-onnx",
104 |         help="The name of the registered model.",
105 |     )
106 |     parser.add_argument(
107 |         "--registered_model_description",
108 |         type=str,
109 |         default="Best AutoML Object Detection ONNX model for fridge objects dataset.",
110 |         help="The description of the registered model.",
111 |     )
112 |     parser.add_argument(
113 |         "--build_reference",
114 |         type=str,
115 |         help="Original AzDo build id that initiated experiment",
116 |     )
117 |     parser.add_argument(
118 |         "--metrics_json_path",
119 |         type=str,
120 |         help="Path to the metrics.json file containing the mAP scores for the ONNX FP32 and FP16 models.",
121 |     )
122 |     args = parser.parse_args()
123 |     main(
124 |         client_id=args.client_id,
125 |         client_secret=args.client_secret,
126 |         tenant_id=args.tenant_id,
127 |         subscription_id=args.subscription_id,
128 |         resource_group_name=args.resource_group_name,
129 |         workspace_name=args.workspace_name,
130 |         input_model_artifacts_path=args.input_model_artifacts_path,
131 |         registered_model_name=args.registered_model_name,
132 |         registered_model_description=args.registered_model_description,
133 |         build_reference=args.build_reference,
134 |         metrics_json_path=args.metrics_json_path,
135 |     )
136 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/score/__init__.py:
--------------------------------------------------------------------------------
1 | """Score step for fridge objects dataset."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/src/train/__init__.py:
--------------------------------------------------------------------------------
1 | """Fridge objects training pipeliene AutoML train source code."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/test/__init__.py:
--------------------------------------------------------------------------------
1 | """Unit tests for the model_factory/fridge_obj_det/src package."""
2 | 


--------------------------------------------------------------------------------
/telco_case_study_implementation/fridge_object_detection/model_factory/fridge_obj_det/test/test_compare_map.py:
--------------------------------------------------------------------------------
 1 | """Unit tests to compare two mAP values."""
 2 | from contextlib import nullcontext as does_not_raise
 3 | 
 4 | import pytest
 5 | from model_factory.fridge_obj_det.src.compare_map import compare_map
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     "map_before,map_after,throws_error,expectation",
10 |     [
11 |         # exact same with tolerance 0.01
12 |         (0.98, 0.97, True, does_not_raise()),
13 |         (0.84, 0.83, True, does_not_raise()),
14 |         # smaller than tolerance 0.01 (improved mAP)
15 |         (0.98, 0.99, True, does_not_raise()),
16 |         # smaller than tolerance 0.01
17 |         (0.98, 0.975, True, does_not_raise()),
18 |         # larger than tolerance 0.01
19 |         (0.98, 0.96, True, pytest.raises(ValueError)),
20 |         (0.98, 0.96, False, does_not_raise()),
21 |         (0.98, 0.969999999999, True, pytest.raises(ValueError)),
22 |         (0.97, 0.959999999999, True, pytest.raises(ValueError)),
23 |     ],
24 | )
25 | def test_compare_map(map_before, map_after, throws_error, expectation):
26 |     """Test compare_map_before_and_after_conversion."""
27 |     with expectation:
28 |         compare_map.compare_scores(
29 |             map_before, map_after, tolerance=0.01, throws_error=throws_error
30 |         )
31 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/test/__init__.py


--------------------------------------------------------------------------------
/test/london_taxi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/test/london_taxi/__init__.py


--------------------------------------------------------------------------------
/test/london_taxi/test_to_delete.py:
--------------------------------------------------------------------------------
1 | def test_print():
2 |     try:
3 |         print("Hello") is None
4 |     except:
5 |         print("Test print function failed.")
6 |         assert False
7 | 


--------------------------------------------------------------------------------
/test/nyc_taxi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/mlops-model-factory-accelerator/68e9980a59dbdfe29d671c0ac253d2bb10d4cc10/test/nyc_taxi/__init__.py


--------------------------------------------------------------------------------
/test/nyc_taxi/test_to_delete.py:
--------------------------------------------------------------------------------
1 | def test_print():
2 |     try:
3 |         print("Hello") is None
4 |     except:
5 |         print("Test print function failed.")
6 |         assert False
7 | 


--------------------------------------------------------------------------------
/test/test_to_delete.py:
--------------------------------------------------------------------------------
1 | def test_print():
2 |     try:
3 |         print("Hello") is None
4 |     except:
5 |         print("Test print function failed.")
6 |         assert False
7 | 


--------------------------------------------------------------------------------