├── ml_service ├── __init__.py ├── util │ ├── __init__.py │ ├── create_scoring_image.sh │ ├── manage_environment.py │ ├── attach_compute.py │ ├── create_scoring_image.py │ ├── smoke_test_scoring_service.py │ └── env_variables.py └── pipelines │ ├── __init__.py │ ├── load_sample_data.py │ ├── diabetes_regression_build_train_pipeline_with_r_on_dbricks.py │ ├── diabetes_regression_build_train_pipeline_with_r.py │ ├── run_train_pipeline.py │ ├── run_parallel_batchscore_pipeline.py │ └── diabetes_regression_build_train_pipeline.py ├── diabetes_regression ├── util │ ├── __init__.py │ └── model_helper.py ├── scoring │ ├── scoreA.py │ ├── scoreB.py │ ├── deployment_config_aci.yml │ ├── inference_config.yml │ ├── deployment_config_aks.yml │ ├── parallel_batchscore_copyoutput.py │ ├── score.py │ └── parallel_batchscore.py ├── training │ ├── R │ │ ├── train_with_r.py │ │ ├── weight_data.csv │ │ ├── train_with_r_on_databricks.py │ │ └── r_train.r │ ├── test_train.py │ ├── train.py │ └── train_aml.py ├── parameters.json ├── .amlignore ├── ci_dependencies.yml ├── conda_dependencies_scorecopy.yml ├── conda_dependencies_scoring.yml ├── conda_dependencies.yml ├── evaluate │ └── evaluate_model.py └── register │ └── register_model.py ├── environment_setup ├── tf-templates │ ├── backend.tf │ └── main.tf ├── iac-remove-environment-pipeline.yml ├── Dockerfile ├── docker-image-pipeline.yml ├── iac-create-environment-pipeline-arm.yml ├── install_requirements.sh ├── iac-create-environment-pipeline-tf.yml └── arm-templates │ └── cloud-environment.json ├── docs ├── images │ ├── deploy-aci.png │ ├── deploy-aks.png │ ├── build-connect.png │ ├── ml-lifecycle.png │ ├── scoring_image.png │ ├── trained-model.png │ ├── model-artifact.png │ ├── multi-stage-aci.png │ ├── created-resources.png │ ├── multi-stage-webapp.png │ ├── run-iac-pipeline.png │ ├── training-pipeline.png │ ├── ml-ws-svc-connection.png │ ├── model-deploy-result.png │ ├── model-train-register.png │ ├── multi-stage-aci-aks.png │ ├── select-iac-pipeline.png │ ├── batchscoring-ci-result.png │ ├── batchscoring-pipeline.png │ ├── library_variable_groups.png │ ├── model-deploy-configure.png │ ├── release-webapp-pipeline.PNG │ ├── model-artifact-cd-trigger.png │ ├── release-task-createimage.PNG │ ├── release-task-webappdeploy.PNG │ ├── ci-build-pipeline-configure.png │ ├── create-rm-service-connection.png │ ├── custom-container-variables.png │ ├── model-deploy-get-artifact-logs.png │ └── model-train-register-artifacts.png ├── development_setup.md ├── custom_container.md ├── canary_ab_deployment.md ├── code_description.md └── custom_model.md ├── charts ├── load_test.sh ├── abtest-model │ ├── Chart.yaml │ ├── values.yaml │ └── templates │ │ ├── service.yaml │ │ └── deployment.yaml └── abtest-istio │ ├── Chart.yaml │ ├── values.yaml │ └── templates │ └── istio-canary.yaml ├── bootstrap ├── README.md └── bootstrap.py ├── data ├── README.md ├── diabetes_missing_values.csv ├── diabetes_bad_schema.csv ├── diabetes_bad_dist.csv └── data_test.py ├── .pipelines ├── helm-install-template.yml ├── pr.yml ├── helm-upgrade-template.yml ├── code-quality-template.yml ├── diabetes_regression-ci-image.yml ├── diabetes_regression-publish-model-artifact-template.yml ├── diabetes_regression-package-model-template.yml ├── diabetes_regression-get-model-id-artifact-template.yml ├── diabetes_regression-batchscoring-ci.yml ├── diabetes_regression-ci.yml ├── diabetes_regression-variables-template.yml ├── abtest.yml └── diabetes_regression-cd.yml ├── LICENSE ├── .gitignore ├── experimentation ├── Diabetes Ridge Regression Scoring.ipynb └── Diabetes Ridge Regression Parameter Experimentation.ipynb ├── .env.example └── README.md /ml_service/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ml_service/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ml_service/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /diabetes_regression/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /environment_setup/tf-templates/backend.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | backend "azurerm" { 3 | } 4 | } 5 | -------------------------------------------------------------------------------- /docs/images/deploy-aci.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/deploy-aci.png -------------------------------------------------------------------------------- /docs/images/deploy-aks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/deploy-aks.png -------------------------------------------------------------------------------- /docs/images/build-connect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/build-connect.png -------------------------------------------------------------------------------- /docs/images/ml-lifecycle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/ml-lifecycle.png -------------------------------------------------------------------------------- /docs/images/scoring_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/scoring_image.png -------------------------------------------------------------------------------- /docs/images/trained-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/trained-model.png -------------------------------------------------------------------------------- /docs/images/model-artifact.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/model-artifact.png -------------------------------------------------------------------------------- /docs/images/multi-stage-aci.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/multi-stage-aci.png -------------------------------------------------------------------------------- /docs/images/created-resources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/created-resources.png -------------------------------------------------------------------------------- /docs/images/multi-stage-webapp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/multi-stage-webapp.png -------------------------------------------------------------------------------- /docs/images/run-iac-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/run-iac-pipeline.png -------------------------------------------------------------------------------- /docs/images/training-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/training-pipeline.png -------------------------------------------------------------------------------- /docs/images/ml-ws-svc-connection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/ml-ws-svc-connection.png -------------------------------------------------------------------------------- /docs/images/model-deploy-result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/model-deploy-result.png -------------------------------------------------------------------------------- /docs/images/model-train-register.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/model-train-register.png -------------------------------------------------------------------------------- /docs/images/multi-stage-aci-aks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/multi-stage-aci-aks.png -------------------------------------------------------------------------------- /docs/images/select-iac-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/select-iac-pipeline.png -------------------------------------------------------------------------------- /diabetes_regression/scoring/scoreA.py: -------------------------------------------------------------------------------- 1 | def init(): 2 | global model 3 | 4 | 5 | def run(raw_data): 6 | return "New Model A" 7 | -------------------------------------------------------------------------------- /diabetes_regression/scoring/scoreB.py: -------------------------------------------------------------------------------- 1 | def init(): 2 | global model 3 | 4 | 5 | def run(raw_data): 6 | return "New Model B" 7 | -------------------------------------------------------------------------------- /docs/images/batchscoring-ci-result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/batchscoring-ci-result.png -------------------------------------------------------------------------------- /docs/images/batchscoring-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/batchscoring-pipeline.png -------------------------------------------------------------------------------- /docs/images/library_variable_groups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/library_variable_groups.png -------------------------------------------------------------------------------- /docs/images/model-deploy-configure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/model-deploy-configure.png -------------------------------------------------------------------------------- /docs/images/release-webapp-pipeline.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/release-webapp-pipeline.PNG -------------------------------------------------------------------------------- /diabetes_regression/scoring/deployment_config_aci.yml: -------------------------------------------------------------------------------- 1 | computeType: ACI 2 | containerResourceRequirements: 3 | cpu: 1 4 | memoryInGB: 4 5 | -------------------------------------------------------------------------------- /docs/images/model-artifact-cd-trigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/model-artifact-cd-trigger.png -------------------------------------------------------------------------------- /docs/images/release-task-createimage.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/release-task-createimage.PNG -------------------------------------------------------------------------------- /docs/images/release-task-webappdeploy.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/release-task-webappdeploy.PNG -------------------------------------------------------------------------------- /charts/load_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for ((i=1;i<=$1;i++)) 4 | do 5 | curl --header "x-api-version: $3" $2 6 | echo 7 | sleep .2 8 | done -------------------------------------------------------------------------------- /docs/images/ci-build-pipeline-configure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/ci-build-pipeline-configure.png -------------------------------------------------------------------------------- /docs/images/create-rm-service-connection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/create-rm-service-connection.png -------------------------------------------------------------------------------- /docs/images/custom-container-variables.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/custom-container-variables.png -------------------------------------------------------------------------------- /docs/images/model-deploy-get-artifact-logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/model-deploy-get-artifact-logs.png -------------------------------------------------------------------------------- /docs/images/model-train-register-artifacts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/ateliermlops/master/docs/images/model-train-register-artifacts.png -------------------------------------------------------------------------------- /charts/abtest-model/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart for Kubernetes 4 | name: abtest-model 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /diabetes_regression/training/R/train_with_r.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | subprocess.check_call(["bash", "-c", "Rscript r_train.r && ls -ltr model.rds"]) 4 | -------------------------------------------------------------------------------- /charts/abtest-istio/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart for Kubernetes 4 | name: abtest-istio 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /charts/abtest-istio/values.yaml: -------------------------------------------------------------------------------- 1 | ingress: 2 | port: 80 3 | 4 | svc: 5 | port: 5001 6 | name: model-svc 7 | 8 | 9 | weight: 10 | green: 50 11 | blue: 50 12 | 13 | uri: 14 | prefix: /score 15 | 16 | -------------------------------------------------------------------------------- /bootstrap/README.md: -------------------------------------------------------------------------------- 1 | # Bootstrap from MLOpsPython repository 2 | 3 | For steps on how to use the bootstrap script, please see the "Bootstrap the project" section of the [custom model guide](../docs/custom_model.md#bootstrap-the-project). 4 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | This folder is used for example data, and it is not meant to be used for storing training data. 2 | 3 | Follow steps to [Configure Training Data](../docs/custom_model.md#Configure-Custom-Training) to use your own data for training. -------------------------------------------------------------------------------- /diabetes_regression/scoring/inference_config.yml: -------------------------------------------------------------------------------- 1 | entryScript: score.py 2 | runtime: python 3 | condaFile: ../conda_dependencies.yml 4 | extraDockerfileSteps: 5 | schemaFile: 6 | sourceDirectory: 7 | enableGpu: False 8 | baseImage: 9 | baseImageRegistry: -------------------------------------------------------------------------------- /charts/abtest-model/values.yaml: -------------------------------------------------------------------------------- 1 | namespace: abtesting 2 | appname: model 3 | 4 | deployment: 5 | name: model-green 6 | bluegreen: green 7 | container: 8 | name: model 9 | port: 5001 10 | 11 | svc: 12 | name: model-svc 13 | port: 5001 -------------------------------------------------------------------------------- /diabetes_regression/parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "training": 3 | { 4 | "alpha": 0.4 5 | }, 6 | "evaluation": 7 | { 8 | 9 | }, 10 | "registration": 11 | { 12 | "tags": ["mse"] 13 | }, 14 | "scoring": 15 | { 16 | 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /diabetes_regression/training/R/weight_data.csv: -------------------------------------------------------------------------------- 1 | height,weight 2 | 79,174 3 | 63,250 4 | 75,223 5 | 75,130 6 | 70,120 7 | 76,239 8 | 63,129 9 | 64,185 10 | 59,246 11 | 80,241 12 | 79,217 13 | 65,212 14 | 74,242 15 | 71,223 16 | 61,167 17 | 78,148 18 | 75,229 19 | 75,116 20 | 75,182 21 | 72,237 22 | 72,160 23 | 79,169 24 | 67,219 25 | 61,202 26 | 65,168 27 | 79,181 28 | 81,214 29 | 78,216 30 | 59,245 31 | -------------------------------------------------------------------------------- /data/diabetes_missing_values.csv: -------------------------------------------------------------------------------- 1 | AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6,Y 2 | ,,,0.0218723549949558,-0.0442234984244464,-0.0348207628376986,-0.0434008456520269,-0.00259226199818282,0.0199084208763183,-0.0176461251598052,151.0 3 | -0.001882016527791,-0.044641636506989,-0.0514740612388061,-0.0263278347173518,-0.00844872411121698,-0.019163339748222,0.0744115640787594,-0.0394933828740919,-0.0683297436244215,-0.09220404962683,75.0 4 | -------------------------------------------------------------------------------- /data/diabetes_bad_schema.csv: -------------------------------------------------------------------------------- 1 | SEX,BMI,BP,S1,S2,S3,S4,S5,S6,Y 2 | 0.0506801187398187,0.0616962065186885,0.0218723549949558,-0.0442234984244464,-0.0348207628376986,-0.0434008456520269,-0.00259226199818282,0.0199084208763183,-0.0176461251598052,151.0 3 | -0.04464163650698,-0.0514740612388061,-0.0263278347173518,-0.00844872411121698,-0.019163339748222,0.0744115640787594,-0.0394933828740919,-0.0683297436244215,-0.09220404962683,75.0 4 | -------------------------------------------------------------------------------- /charts/abtest-model/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: "{{ .Values.svc.name }}-{{ .Values.deployment.bluegreen }}" 5 | namespace: {{ .Values.namespace }} 6 | spec: 7 | selector: 8 | app: {{ .Values.appname }} 9 | model_version: {{ .Values.deployment.bluegreen }} 10 | ports: 11 | - port: {{ .Values.svc.port }} 12 | targetPort: {{ .Values.deployment.container.port }} 13 | -------------------------------------------------------------------------------- /data/diabetes_bad_dist.csv: -------------------------------------------------------------------------------- 1 | AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6,Y 2 | 100,0.0506801187398187,0.0616962065186885,0.0218723549949558,-0.0442234984244464,-0.0348207628376986,-0.0434008456520269,-0.00259226199818282,0.0199084208763183,-0.0176461251598052,151.0 3 | -0.0018820165277910,-0.044641636506989,-0.0514740612388061,-0.0263278347173518,-0.00844872411121698,-0.019163339748222,0.0744115640787594,-0.0394933828740919,-0.0683297436244215,-0.09220404962683,75.0 4 | -------------------------------------------------------------------------------- /.pipelines/helm-install-template.yml: -------------------------------------------------------------------------------- 1 | # Pipeline template for installing helm on the agent. 2 | steps: 3 | - task: Bash@3 4 | displayName: 'Install Helm $(helmVersion)' 5 | inputs: 6 | targetType: inline 7 | script: wget -q $(helmDownloadURL) -O /tmp/$FILENAME && tar -zxvf /tmp/$FILENAME -C /tmp && sudo mv /tmp/linux-amd64/helm /usr/local/bin/helm 8 | env: 9 | HELM_VERSION: $(helmVersion) 10 | FILENAME: helm-$(helmVersion)-linux-amd64.tar.gz 11 | -------------------------------------------------------------------------------- /diabetes_regression/training/R/train_with_r_on_databricks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | parser = argparse.ArgumentParser("train") 5 | parser.add_argument( 6 | "--AZUREML_SCRIPT_DIRECTORY_NAME", 7 | type=str, 8 | help="folder", 9 | ) 10 | 11 | args, unknown = parser.parse_known_args() 12 | folder = args.AZUREML_SCRIPT_DIRECTORY_NAME 13 | 14 | os.system("cd " + "/dbfs/" + folder 15 | + " && Rscript r_train.r && ls -ltr model.rds") 16 | -------------------------------------------------------------------------------- /diabetes_regression/scoring/deployment_config_aks.yml: -------------------------------------------------------------------------------- 1 | computeType: AKS 2 | autoScaler: 3 | autoscaleEnabled: True 4 | minReplicas: 1 5 | maxReplicas: 3 6 | refreshPeriodInSeconds: 10 7 | targetUtilization: 70 8 | authEnabled: True 9 | containerResourceRequirements: 10 | cpu: 1 11 | memoryInGB: 4 12 | appInsightsEnabled: True 13 | scoringTimeoutMs: 5000 14 | maxConcurrentRequestsPerContainer: 2 15 | maxQueueWaitMs: 5000 16 | sslEnabled: True 17 | -------------------------------------------------------------------------------- /diabetes_regression/.amlignore: -------------------------------------------------------------------------------- 1 | # To prevent unnecessary files from being included in 2 | # the snapshot, make an ignore file (.gitignore or .amlignore). 3 | # Place this file in the Snapshot directory and add the 4 | # filenames to ignore in it. The .amlignore file uses 5 | # the same syntax and patterns as the .gitignore file. 6 | # If both files exist, the .amlignore file takes precedence. 7 | 8 | # We use yml files to configure deployment, 9 | # but we are not deploying them to compute 10 | *.yml 11 | -------------------------------------------------------------------------------- /.pipelines/pr.yml: -------------------------------------------------------------------------------- 1 | # Pipeline to run basic code quality tests as part of pull requests to the master branch. 2 | 3 | resources: 4 | containers: 5 | - container: mlops 6 | image: mcr.microsoft.com/mlops/python:latest 7 | 8 | trigger: none 9 | pr: 10 | branches: 11 | include: 12 | - master 13 | 14 | pool: 15 | vmImage: 'ubuntu-latest' 16 | 17 | container: mlops 18 | 19 | variables: 20 | - template: diabetes_regression-variables-template.yml 21 | - group: devopsforai-aml-vg 22 | 23 | steps: 24 | - template: code-quality-template.yml 25 | -------------------------------------------------------------------------------- /environment_setup/iac-remove-environment-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Pipeline that removes the resources created by the IaC Create Environment pipeline. 2 | pr: none 3 | trigger: none 4 | 5 | pool: 6 | vmImage: 'ubuntu-latest' 7 | 8 | variables: 9 | - group: devopsforai-aml-vg 10 | 11 | 12 | steps: 13 | - task: AzureResourceGroupDeployment@2 14 | inputs: 15 | azureSubscription: '$(AZURE_RM_SVC_CONNECTION)' 16 | action: 'DeleteRG' 17 | resourceGroupName: '$(RESOURCE_GROUP)' 18 | location: $(LOCATION) 19 | displayName: 'Delete resources in Azure' 20 | 21 | -------------------------------------------------------------------------------- /ml_service/util/create_scoring_image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -euo pipefail # strict mode, fail on error 4 | set -x # verbose 5 | 6 | docker run \ 7 | --rm \ 8 | -t \ 9 | -v $PWD:/mlops \ 10 | -v ${AZURE_CONFIG_DIR:-$HOME/.azure}:/root/.azure \ 11 | -e SUBSCRIPTION_ID=$(az account show --query id -o tsv) \ 12 | -e RESOURCE_GROUP=$RESOURCE_GROUP \ 13 | -e WORKSPACE_NAME=$WORKSPACE_NAME \ 14 | -e MODEL_NAME=$MODEL_NAME \ 15 | -e IMAGE_NAME=$IMAGE_NAME \ 16 | mcr.microsoft.com/mlops/python:latest \ 17 | bash -c "cd /mlops/ && python ml_service/util/create_scoring_image.py" 18 | -------------------------------------------------------------------------------- /.pipelines/helm-upgrade-template.yml: -------------------------------------------------------------------------------- 1 | # Pipeline template for deploying / upgrading using Helm. 2 | parameters: 3 | chartPath: '' 4 | releaseName: '' 5 | overrideValues: '' 6 | 7 | steps: 8 | - template: helm-install-template.yml 9 | - task: HelmDeploy@0 10 | displayName: 'helm upgrade' 11 | inputs: 12 | connectionType: 'Kubernetes Service Connection' 13 | kubernetesServiceConnection: $(K8S_AB_SERVICE_CONNECTION) 14 | command: upgrade 15 | chartType: FilePath 16 | chartPath: ${{ parameters.chartPath }} 17 | releaseName: ${{ parameters.releaseName }} 18 | overrideValues: ${{ parameters.overrideValues }} 19 | install: true 20 | arguments: --namespace $(K8S_AB_NAMESPACE) 21 | -------------------------------------------------------------------------------- /environment_setup/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM conda/miniconda3 2 | 3 | LABEL org.label-schema.vendor = "Microsoft" \ 4 | org.label-schema.url = "https://hub.docker.com/r/microsoft/mlopspython" \ 5 | org.label-schema.vcs-url = "https://github.com/microsoft/MLOpsPython" 6 | 7 | COPY diabetes_regression/ci_dependencies.yml /setup/ 8 | 9 | # activate environment 10 | ENV PATH /usr/local/envs/mlopspython_ci/bin:$PATH 11 | 12 | RUN conda update -n base -c defaults conda && \ 13 | conda install python=3.7.5 && \ 14 | conda env create -f /setup/ci_dependencies.yml && \ 15 | /bin/bash -c "source activate mlopspython_ci" && \ 16 | az --version && \ 17 | chmod -R 777 /usr/local/envs/mlopspython_ci/lib/python3.7 18 | -------------------------------------------------------------------------------- /ml_service/pipelines/load_sample_data.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | from sklearn.datasets import load_diabetes 4 | 5 | 6 | # Loads the diabetes sample data from sklearn and produces a csv file that can 7 | # be used by the build/train pipeline script. 8 | def create_sample_data_csv(file_name: str = "diabetes.csv", 9 | for_scoring: bool = False): 10 | sample_data = load_diabetes() 11 | df = pd.DataFrame( 12 | data=sample_data.data, 13 | columns=sample_data.feature_names) 14 | if not for_scoring: 15 | df['Y'] = sample_data.target 16 | # Hard code to diabetes so we fail fast if the project has been 17 | # bootstrapped. 18 | df.to_csv(file_name, index=False) 19 | -------------------------------------------------------------------------------- /diabetes_regression/ci_dependencies.yml: -------------------------------------------------------------------------------- 1 | name: mlopspython_ci 2 | 3 | dependencies: 4 | # The python interpreter version. 5 | - python=3.7.* 6 | 7 | # dependencies with versions aligned with conda_dependencies.yml. 8 | - numpy=1.18.* 9 | - pandas=1.0.* 10 | - scikit-learn=0.22.* 11 | # dependencies for MLOps with R. 12 | - r=3.6.0 13 | - r-essentials=3.6.0 14 | 15 | - conda-forge::jq 16 | - pip=20.0.* 17 | 18 | - pip: 19 | # dependencies with versions aligned with conda_dependencies.yml. 20 | - azureml-sdk 21 | 22 | # Additional pip dependencies for the CI environment. 23 | - pytest==5.4.* 24 | - pytest-cov==2.8.* 25 | - requests==2.23.* 26 | - python-dotenv==0.12.* 27 | - flake8==3.7.* 28 | - flake8_formatter_junit_xml==0.0.* 29 | - azure-cli==2.3.* 30 | -------------------------------------------------------------------------------- /diabetes_regression/training/R/r_train.r: -------------------------------------------------------------------------------- 1 | print(R.version.string) 2 | 3 | # COMMAND ---------- 4 | 5 | path="weight_data.csv" 6 | print(paste("Reading file from",path)) 7 | 8 | routes<-read.csv(path, header=TRUE) 9 | 10 | # The predictor vector (height). 11 | x <- routes$height 12 | # The response vector (weight). 13 | y <- routes$weight 14 | # Apply the lm() function. 15 | model <- lm(y~x) 16 | 17 | # COMMAND ---------- 18 | 19 | routes 20 | 21 | # COMMAND ---------- 22 | 23 | # Make Predictions 24 | df_test_heights <- data.frame(x = as.numeric(c(115,20))) 25 | result <- predict(model,df_test_heights) 26 | print(result) 27 | 28 | # COMMAND ---------- 29 | 30 | # Save the model to blob storage 31 | model_path="model.rds" 32 | saveRDS(model, model_path) 33 | 34 | # COMMAND ---------- 35 | 36 | # View model details 37 | print(model) 38 | 39 | # COMMAND ---------- 40 | 41 | print('Completed') -------------------------------------------------------------------------------- /charts/abtest-model/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ .Values.deployment.name }} 5 | namespace: {{ .Values.namespace }} 6 | labels: 7 | app: {{ .Values.appname }} 8 | model_version: {{ .Values.deployment.bluegreen }} 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app: {{ .Values.appname }} 14 | model_version: {{ .Values.deployment.bluegreen }} 15 | template: 16 | metadata: 17 | labels: 18 | app: {{ .Values.appname }} 19 | model_version: {{ .Values.deployment.bluegreen }} 20 | spec: 21 | containers: 22 | - name: {{ .Values.deployment.container.name }} 23 | image: "{{ .Values.deployment.image.name }}" 24 | imagePullPolicy: Always 25 | ports: 26 | - name: http 27 | containerPort: 5001 28 | - name: probe 29 | containerPort: 8086 30 | 31 | -------------------------------------------------------------------------------- /.pipelines/code-quality-template.yml: -------------------------------------------------------------------------------- 1 | # Pipeline template to run linting, unit tests with code coverage, and publish the results. 2 | steps: 3 | - script: | 4 | flake8 --output-file=lint-testresults.xml --format junit-xml 5 | displayName: 'Run lint tests' 6 | 7 | - script: | 8 | python -m pytest . --cov=diabetes_regression --cov-report=html --cov-report=xml --junitxml=unit-testresults.xml 9 | condition: succeededOrFailed() 10 | displayName: 'Run unit tests' 11 | 12 | - task: PublishTestResults@2 13 | condition: succeededOrFailed() 14 | inputs: 15 | testResultsFiles: '*-testresults.xml' 16 | testRunTitle: 'Linting & Unit tests' 17 | failTaskOnFailedTests: true 18 | displayName: 'Publish test results' 19 | 20 | - task: PublishCodeCoverageResults@1 21 | displayName: 'Publish coverage report' 22 | condition: succeededOrFailed() 23 | inputs: 24 | codeCoverageTool: Cobertura 25 | summaryFileLocation: 'coverage.xml' 26 | reportDirectory: 'htmlcov' 27 | failIfCoverageEmpty: true 28 | -------------------------------------------------------------------------------- /diabetes_regression/training/test_train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from diabetes_regression.training.train import train_model, get_model_metrics 3 | 4 | 5 | def test_train_model(): 6 | X_train = np.array([1, 2, 3, 4, 5, 6]).reshape(-1, 1) 7 | y_train = np.array([10, 9, 8, 8, 6, 5]) 8 | data = {"train": {"X": X_train, "y": y_train}} 9 | 10 | reg_model = train_model(data, {"alpha": 1.2}) 11 | 12 | preds = reg_model.predict([[1], [2]]) 13 | np.testing.assert_almost_equal(preds, [9.93939393939394, 9.03030303030303]) 14 | 15 | 16 | def test_get_model_metrics(): 17 | 18 | class MockModel: 19 | 20 | @staticmethod 21 | def predict(data): 22 | return ([8.12121212, 7.21212121]) 23 | 24 | X_test = np.array([3, 4]).reshape(-1, 1) 25 | y_test = np.array([8, 7]) 26 | data = {"test": {"X": X_test, "y": y_test}} 27 | 28 | metrics = get_model_metrics(MockModel(), data) 29 | 30 | assert 'mse' in metrics 31 | mse = metrics['mse'] 32 | np.testing.assert_almost_equal(mse, 0.029843893480257067) 33 | -------------------------------------------------------------------------------- /environment_setup/docker-image-pipeline.yml: -------------------------------------------------------------------------------- 1 | # Pipeline that builds and pushes the microsoft/mlopspython image. 2 | resources: 3 | - repo: self 4 | 5 | pool: 6 | vmImage: 'ubuntu-latest' 7 | 8 | trigger: 9 | branches: 10 | include: 11 | - master 12 | 13 | paths: 14 | include: 15 | - environment_setup/Dockerfile 16 | 17 | variables: 18 | containerRegistry: $[coalesce(variables['acrServiceConnection'], 'acrconnection')] 19 | imageName: $[coalesce(variables['agentImageName'], 'public/mlops/python')] 20 | 21 | steps: 22 | - task: Docker@2 23 | displayName: Build and Push 24 | inputs: 25 | command: buildAndPush 26 | containerRegistry: '$(containerRegistry)' 27 | repository: '$(imageName)' 28 | tags: | 29 | ${{format('build-{0}', '$(Build.BuildNumber)')}} 30 | ${{format('amlsdk-{0}', '$(amlsdkversion)')}} 31 | ${{format('release-{0}', '$(githubrelease)')}} 32 | latest 33 | buildContext: '$(Build.SourcesDirectory)' 34 | dockerFile: '$(Build.SourcesDirectory)/environment_setup/Dockerfile' 35 | -------------------------------------------------------------------------------- /.pipelines/diabetes_regression-ci-image.yml: -------------------------------------------------------------------------------- 1 | # Pipeline for building the container image that is used by other pipelines for scoring. 2 | 3 | resources: 4 | containers: 5 | - container: mlops 6 | image: mcr.microsoft.com/mlops/python:latest 7 | 8 | pr: none 9 | trigger: 10 | branches: 11 | include: 12 | - master 13 | paths: 14 | include: 15 | - ml_service/util/create_scoring_image.py 16 | - ml_service/util/Dockerfile 17 | - diabetes_regression/scoring/ 18 | exclude: 19 | - diabetes_regression/scoring/deployment_config_aci.yml 20 | - diabetes_regression/scoring/deployment_config_aks.yml 21 | 22 | pool: 23 | vmImage: 'ubuntu-latest' 24 | 25 | container: mlops 26 | 27 | variables: 28 | - group: devopsforai-aml-vg 29 | - name: 'SCORE_SCRIPT' 30 | value: 'scoring/scoreB.py' 31 | 32 | steps: 33 | - template: diabetes_regression-package-model-template.yml 34 | parameters: 35 | modelId: $(MODEL_NAME):$(MODEL_VERSION) 36 | scoringScriptPath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/$(SCORE_SCRIPT)' 37 | condaFilePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/conda_dependencies.yml' 38 | 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /.pipelines/diabetes_regression-publish-model-artifact-template.yml: -------------------------------------------------------------------------------- 1 | # Pipeline template to check if a model was registered for the build and publishes an artifact with the model JSON 2 | steps: 3 | - task: AzureCLI@1 4 | displayName: 'Install AzureML CLI' 5 | inputs: 6 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 7 | scriptLocation: inlineScript 8 | workingDirectory: $(Build.SourcesDirectory) 9 | inlineScript: 'az extension add -n azure-cli-ml' 10 | - task: AzureCLI@1 11 | inputs: 12 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 13 | scriptLocation: inlineScript 14 | workingDirectory: $(Build.SourcesDirectory) 15 | inlineScript: | 16 | set -e # fail on error 17 | 18 | # Get the model using the build ID tag 19 | FOUND_MODEL=$(az ml model list -g $(RESOURCE_GROUP) --workspace-name $(WORKSPACE_NAME) --tag BuildId=$(Build.BuildId) --query '[0]') 20 | 21 | # If the variable is empty, print and fail 22 | [[ -z "$FOUND_MODEL" ]] && { echo "Model was not registered for this run." ; exit 1; } 23 | 24 | # Write to a file 25 | echo $FOUND_MODEL >model.json 26 | name: 'getversion' 27 | displayName: "Determine if evaluation succeeded and new model is registered (CLI)" 28 | - publish: model.json 29 | artifact: model 30 | -------------------------------------------------------------------------------- /environment_setup/iac-create-environment-pipeline-arm.yml: -------------------------------------------------------------------------------- 1 | # CI/PR Pipeline that deploys an ARM template to create or update the resources needed by the other pipelines. 2 | trigger: 3 | branches: 4 | include: 5 | - master 6 | paths: 7 | include: 8 | - environment_setup/arm-templates/* 9 | pr: 10 | branches: 11 | include: 12 | - master 13 | paths: 14 | include: 15 | - environment_setup/arm-templates/* 16 | 17 | pool: 18 | vmImage: "ubuntu-latest" 19 | 20 | variables: 21 | - group: devopsforai-aml-vg 22 | - name: WORKSPACE_SKU # https://docs.microsoft.com/en-us/azure/machine-learning/overview-what-is-azure-ml#sku 23 | value: basic 24 | 25 | steps: 26 | - task: AzureResourceGroupDeployment@2 27 | inputs: 28 | azureSubscription: "$(AZURE_RM_SVC_CONNECTION)" 29 | action: "Create Or Update Resource Group" 30 | resourceGroupName: "$(RESOURCE_GROUP)" 31 | location: $(LOCATION) 32 | templateLocation: "Linked artifact" 33 | csmFile: "$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json" 34 | overrideParameters: "-baseName $(BASE_NAME) -location $(LOCATION) -workspace $(WORKSPACE_NAME) -sku $(WORKSPACE_SKU)" 35 | deploymentMode: "Incremental" 36 | displayName: "Deploy MLOps resources to Azure" 37 | -------------------------------------------------------------------------------- /diabetes_regression/conda_dependencies_scorecopy.yml: -------------------------------------------------------------------------------- 1 | # Conda environment specification. The dependencies defined in this file will 2 | # be automatically provisioned for managed runs. These include runs against 3 | # the localdocker, remotedocker, and cluster compute targets. 4 | 5 | # Note that this file is NOT used to automatically manage dependencies for the 6 | # local compute target. To provision these dependencies locally, run: 7 | # conda env update --file conda_dependencies.yml 8 | 9 | # Details about the Conda environment file format: 10 | # https://conda.io/docs/using/envs.html#create-environment-file-by-hand 11 | 12 | # For managing Spark packages and configuration, see spark_dependencies.yml. 13 | # Version of this configuration file's structure and semantics in AzureML. 14 | # This directive is stored in a comment to preserve the Conda file structure. 15 | # [AzureMlVersion] = 2 16 | 17 | # These dependencies are used to create the environment used by the batch score 18 | # copy pipeline step 19 | name: diabetes_regression_score_copy_env 20 | dependencies: 21 | # The python interpreter version. 22 | # Currently Azure ML Workbench only supports 3.5.2 and later. 23 | - python=3.7.* 24 | - pip 25 | 26 | - pip: 27 | # Base AzureML SDK 28 | - azureml-sdk==1.6.* 29 | 30 | # Score copying deps 31 | - azure-storage-blob 32 | -------------------------------------------------------------------------------- /diabetes_regression/conda_dependencies_scoring.yml: -------------------------------------------------------------------------------- 1 | # Conda environment specification. The dependencies defined in this file will 2 | # be automatically provisioned for managed runs. These include runs against 3 | # the localdocker, remotedocker, and cluster compute targets. 4 | 5 | # Note that this file is NOT used to automatically manage dependencies for the 6 | # local compute target. To provision these dependencies locally, run: 7 | # conda env update --file conda_dependencies.yml 8 | 9 | # Details about the Conda environment file format: 10 | # https://conda.io/docs/using/envs.html#create-environment-file-by-hand 11 | 12 | # For managing Spark packages and configuration, see spark_dependencies.yml. 13 | # Version of this configuration file's structure and semantics in AzureML. 14 | # This directive is stored in a comment to preserve the Conda file structure. 15 | # [AzureMlVersion] = 2 16 | 17 | # These dependencies are used to create the environment used by the batch score 18 | # pipeline step 19 | name: diabetes_regression_scoring_env 20 | dependencies: 21 | # The python interpreter version. 22 | # Currently Azure ML Workbench only supports 3.5.2 and later. 23 | - python=3.7.* 24 | - pip 25 | 26 | - pip: 27 | # Base AzureML SDK 28 | - azureml-sdk==1.6.* 29 | 30 | # Scoring deps 31 | - scikit-learn 32 | - pandas 33 | -------------------------------------------------------------------------------- /diabetes_regression/conda_dependencies.yml: -------------------------------------------------------------------------------- 1 | # Conda environment specification. The dependencies defined in this file will 2 | # be automatically provisioned for managed runs. These include runs against 3 | # the localdocker, remotedocker, and cluster compute targets. 4 | 5 | # Note that this file is NOT used to automatically manage dependencies for the 6 | # local compute target. To provision these dependencies locally, run: 7 | # conda env update --file conda_dependencies.yml 8 | 9 | # Details about the Conda environment file format: 10 | # https://conda.io/docs/using/envs.html#create-environment-file-by-hand 11 | 12 | # For managing Spark packages and configuration, see spark_dependencies.yml. 13 | # Version of this configuration file's structure and semantics in AzureML. 14 | # This directive is stored in a comment to preserve the Conda file structure. 15 | # [AzureMlVersion] = 2 16 | 17 | name: diabetes_regression_training_env 18 | dependencies: 19 | # The python interpreter version. 20 | # Currently Azure ML Workbench only supports 3.5.2 and later. 21 | - python=3.7.* 22 | - pip 23 | 24 | - pip: 25 | # Base AzureML SDK 26 | - azureml-sdk 27 | 28 | # Must match AzureML SDK version. 29 | # https://docs.microsoft.com/en-us/azure/machine-learning/concept-environments 30 | - azureml-defaults 31 | 32 | # Training deps 33 | - scikit-learn 34 | 35 | # Scoring deps 36 | - inference-schema[numpy-support] 37 | 38 | # MLOps with R 39 | - azure-storage-blob 40 | -------------------------------------------------------------------------------- /ml_service/util/manage_environment.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | from azureml.core import Workspace, Environment 4 | from ml_service.util.env_variables import Env 5 | from azureml.core.runconfig import DEFAULT_CPU_IMAGE, DEFAULT_GPU_IMAGE 6 | 7 | 8 | def get_environment( 9 | workspace: Workspace, 10 | environment_name: str, 11 | conda_dependencies_file: str, 12 | create_new: bool = False, 13 | enable_docker: bool = None, 14 | use_gpu: bool = False 15 | ): 16 | try: 17 | e = Env() 18 | environments = Environment.list(workspace=workspace) 19 | restored_environment = None 20 | for env in environments: 21 | if env == environment_name: 22 | restored_environment = environments[environment_name] 23 | 24 | if restored_environment is None or create_new: 25 | new_env = Environment.from_conda_specification( 26 | environment_name, 27 | os.path.join(e.sources_directory_train, conda_dependencies_file), # NOQA: E501 28 | ) # NOQA: E501 29 | restored_environment = new_env 30 | if enable_docker is not None: 31 | restored_environment.docker.enabled = enable_docker 32 | restored_environment.docker.base_image = DEFAULT_GPU_IMAGE if use_gpu else DEFAULT_CPU_IMAGE # NOQA: E501 33 | restored_environment.register(workspace) 34 | 35 | if restored_environment is not None: 36 | print(restored_environment) 37 | return restored_environment 38 | except Exception as e: 39 | print(e) 40 | exit(1) 41 | -------------------------------------------------------------------------------- /.pipelines/diabetes_regression-package-model-template.yml: -------------------------------------------------------------------------------- 1 | # Pipeline template that creates a model package and adds the package location to the environment for subsequent tasks to use. 2 | parameters: 3 | - name: modelId 4 | type: string 5 | default: '' 6 | - name: scoringScriptPath 7 | type: string 8 | default: '' 9 | - name: condaFilePath 10 | type: string 11 | default: '' 12 | 13 | steps: 14 | - task: AzureCLI@1 15 | displayName: 'Install AzureML CLI' 16 | inputs: 17 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 18 | scriptLocation: inlineScript 19 | workingDirectory: $(Build.SourcesDirectory) 20 | inlineScript: 'az extension add -n azure-cli-ml' 21 | - task: AzureCLI@1 22 | displayName: 'Create model package and set IMAGE_LOCATION variable' 23 | inputs: 24 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 25 | scriptLocation: inlineScript 26 | inlineScript: | 27 | set -e # fail on error 28 | 29 | # Create model package using CLI 30 | az ml model package --workspace-name $(WORKSPACE_NAME) -g $(RESOURCE_GROUP) \ 31 | --model '${{ parameters.modelId }}' \ 32 | --entry-script '${{ parameters.scoringScriptPath }}' \ 33 | --cf '${{ parameters.condaFilePath }}' \ 34 | -v \ 35 | --rt python --query 'location' -o tsv > image_logs.txt 36 | 37 | # Show logs 38 | cat image_logs.txt 39 | 40 | # Set environment variable using the last line of logs that has the package location 41 | IMAGE_LOCATION=$(tail -n 1 image_logs.txt) 42 | echo "##vso[task.setvariable variable=IMAGE_LOCATION]$IMAGE_LOCATION" 43 | -------------------------------------------------------------------------------- /charts/abtest-istio/templates/istio-canary.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.istio.io/v1alpha3 2 | kind: Gateway 3 | metadata: 4 | name: mlmodel-gateway 5 | namespace: abtesting 6 | spec: 7 | selector: 8 | istio: ingressgateway 9 | servers: 10 | - port: 11 | number: {{ .Values.ingress.port }} 12 | name: http 13 | protocol: HTTP 14 | hosts: 15 | - "*" 16 | --- 17 | apiVersion: networking.istio.io/v1alpha3 18 | kind: VirtualService 19 | metadata: 20 | name: mlmodel-virtualservice 21 | namespace: abtesting 22 | spec: 23 | gateways: 24 | - mlmodel-gateway 25 | hosts: 26 | - '*' 27 | http: 28 | - match: 29 | - uri: 30 | prefix: /score 31 | headers: 32 | x-api-version: 33 | exact: 'blue' 34 | route: 35 | - destination: 36 | host: {{ .Values.svc.name }}-blue.abtesting.svc.cluster.local 37 | port: 38 | number: {{ .Values.svc.port }} 39 | - match: 40 | - uri: 41 | prefix: /score 42 | headers: 43 | x-api-version: 44 | exact: 'green' 45 | route: 46 | - destination: 47 | host: {{ .Values.svc.name }}-green.abtesting.svc.cluster.local 48 | port: 49 | number: {{ .Values.svc.port }} 50 | - route: 51 | - destination: 52 | host: {{ .Values.svc.name }}-green.abtesting.svc.cluster.local 53 | port: 54 | number: {{ .Values.svc.port }} 55 | weight: {{ .Values.weight.green }} 56 | - destination: 57 | host: {{ .Values.svc.name }}-blue.abtesting.svc.cluster.local 58 | port: 59 | number: {{ .Values.svc.port }} 60 | weight: {{ .Values.weight.blue }} -------------------------------------------------------------------------------- /environment_setup/install_requirements.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) Microsoft Corporation. All rights reserved.​ 4 | # ​ 5 | # Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, 6 | # royalty-free right to use, copy, and modify the software code provided by us 7 | # ('Software Code'). You may not sublicense the Software Code or any use of it 8 | # (except to your affiliates and to vendors to perform work on your behalf) 9 | # through distribution, network access, service agreement, lease, rental, or 10 | # otherwise. This license does not purport to express any claim of ownership over 11 | # data you may have shared with Microsoft in the creation of the Software Code. 12 | # Unless applicable law gives you more rights, Microsoft reserves all other 13 | # rights not expressly granted herein, whether by implication, estoppel or 14 | # otherwise. ​ 15 | # ​ 16 | # THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | # MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 23 | # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | # ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE 25 | # POSSIBILITY OF SUCH DAMAGE. 26 | 27 | set -eux 28 | 29 | conda env create -f diabetes_regression/ci_dependencies.yml 30 | 31 | conda activate mlopspython_ci 32 | -------------------------------------------------------------------------------- /.pipelines/diabetes_regression-get-model-id-artifact-template.yml: -------------------------------------------------------------------------------- 1 | # Pipeline template that gets the model name and version from a previous build's artifact 2 | 3 | parameters: 4 | - name: projectId 5 | type: string 6 | default: '' 7 | - name: pipelineId 8 | type: string 9 | default: '' 10 | - name: artifactBuildId 11 | type: string 12 | default: latest 13 | 14 | steps: 15 | - download: none 16 | - task: DownloadPipelineArtifact@2 17 | displayName: Download Pipeline Artifacts 18 | inputs: 19 | source: 'specific' 20 | project: '${{ parameters.projectId }}' 21 | pipeline: '${{ parameters.pipelineId }}' 22 | preferTriggeringPipeline: true 23 | ${{ if eq(parameters.artifactBuildId, 'latest') }}: 24 | buildVersionToDownload: 'latestFromBranch' 25 | ${{ if ne(parameters.artifactBuildId, 'latest') }}: 26 | buildVersionToDownload: 'specific' 27 | runId: '${{ parameters.artifactBuildId }}' 28 | runBranch: '$(Build.SourceBranch)' 29 | path: $(Build.SourcesDirectory)/bin 30 | - task: Bash@3 31 | name: get_model 32 | displayName: Parse Json for Model Name and Version 33 | inputs: 34 | targetType: 'inline' 35 | script: | 36 | # Print JSON 37 | cat $(Build.SourcesDirectory)/bin/model/model.json | jq '.' 38 | 39 | # Set model name and version variables 40 | MODEL_NAME=$(jq -r '.name' <$(Build.SourcesDirectory)/bin/model/model.json) 41 | MODEL_VERSION=$(jq -r '.version' <$(Build.SourcesDirectory)/bin/model/model.json) 42 | 43 | echo "Model Name: $MODEL_NAME" 44 | echo "Model Version: $MODEL_VERSION" 45 | 46 | # Set environment variables 47 | echo "##vso[task.setvariable variable=MODEL_VERSION;isOutput=true]$MODEL_VERSION" 48 | echo "##vso[task.setvariable variable=MODEL_NAME;isOutput=true]$MODEL_NAME" 49 | -------------------------------------------------------------------------------- /docs/development_setup.md: -------------------------------------------------------------------------------- 1 | ## Development environment setup 2 | 3 | ### Setup 4 | 5 | Please be aware that the local environment also needs access to the Azure subscription so you have to have Contributor access on the Azure ML Workspace. 6 | 7 | In order to configure the project locally, create a copy of `.env.example` in the root directory and name it `.env`. Fill out all missing values and adjust the existing ones to suit your requirements. 8 | 9 | ### Installation 10 | 11 | [Install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). The Azure CLI will be used to log you in interactively. 12 | 13 | Create a virtual environment using [venv](https://docs.python.org/3/library/venv.html), [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) or [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv). 14 | 15 | Here is an example for setting up and activating a `venv` environment with Python 3: 16 | 17 | ``` 18 | python3 -mvenv .venv 19 | source .venv/bin/activate 20 | ``` 21 | 22 | Install the required Python modules in your virtual environment. 23 | 24 | ``` 25 | pip install -r environment_setup/requirements.txt 26 | ``` 27 | 28 | ### Running local code 29 | 30 | To run your local ML pipeline code on Azure ML, run a command such as the following (in bash, all on one line): 31 | 32 | ``` 33 | export BUILD_BUILDID=$(uuidgen); python ml_service/pipelines/build_train_pipeline.py && python ml_service/pipelines/run_train_pipeline.py 34 | ``` 35 | 36 | BUILD_BUILDID is a variable used to uniquely identify the ML pipeline between the 37 | `build_train_pipeline.py` and `run_train_pipeline.py` scripts. In Azure DevOps it is 38 | set to the current build number. In a local environment, we can use a command such as 39 | `uuidgen` so set a different random identifier on each run, ensuring there are 40 | no collisions. 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | venv/ 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | *-testresults.xml 51 | test-output.xml 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | *.vscode 96 | condaenv.* 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | 111 | .DS_Store 112 | -------------------------------------------------------------------------------- /ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py: -------------------------------------------------------------------------------- 1 | from azureml.pipeline.core import Pipeline 2 | from azureml.core import Workspace 3 | from ml_service.util.attach_compute import get_compute 4 | from azureml.pipeline.steps import DatabricksStep 5 | from ml_service.util.env_variables import Env 6 | 7 | 8 | def main(): 9 | e = Env() 10 | # Get Azure machine learning workspace 11 | aml_workspace = Workspace.get( 12 | name=e.workspace_name, 13 | subscription_id=e.subscription_id, 14 | resource_group=e.resource_group 15 | ) 16 | print("get_workspace:") 17 | print(aml_workspace) 18 | 19 | # Get Azure machine learning cluster 20 | aml_compute = get_compute( 21 | aml_workspace, 22 | e.compute_name, 23 | e.vm_size) 24 | if aml_compute is not None: 25 | print("aml_compute:") 26 | print(aml_compute) 27 | 28 | train_step = DatabricksStep( 29 | name="DBPythonInLocalMachine", 30 | num_workers=1, 31 | python_script_name="train_with_r_on_databricks.py", 32 | source_directory="diabetes_regression/training/R", 33 | run_name='DB_Python_R_demo', 34 | existing_cluster_id=e.db_cluster_id, 35 | compute_target=aml_compute, 36 | allow_reuse=False 37 | ) 38 | 39 | print("Step Train created") 40 | 41 | steps = [train_step] 42 | 43 | train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) 44 | train_pipeline.validate() 45 | published_pipeline = train_pipeline.publish( 46 | name=e.pipeline_name + "_with_R_on_DB", 47 | description="Model training/retraining pipeline", 48 | version=e.build_id 49 | ) 50 | print(f'Published pipeline: {published_pipeline.name}') 51 | print(f'for build {published_pipeline.version}') 52 | 53 | 54 | if __name__ == '__main__': 55 | main() 56 | -------------------------------------------------------------------------------- /ml_service/util/attach_compute.py: -------------------------------------------------------------------------------- 1 | 2 | from azureml.core import Workspace 3 | from azureml.core.compute import AmlCompute 4 | from azureml.core.compute import ComputeTarget 5 | from azureml.exceptions import ComputeTargetException 6 | from ml_service.util.env_variables import Env 7 | 8 | 9 | def get_compute(workspace: Workspace, compute_name: str, vm_size: str, for_batch_scoring: bool = False): # NOQA E501 10 | try: 11 | if compute_name in workspace.compute_targets: 12 | compute_target = workspace.compute_targets[compute_name] 13 | if compute_target and type(compute_target) is AmlCompute: 14 | print("Found existing compute target " + compute_name + " so using it.") # NOQA 15 | else: 16 | e = Env() 17 | compute_config = AmlCompute.provisioning_configuration( 18 | vm_size=vm_size, 19 | vm_priority=e.vm_priority if not for_batch_scoring else e.vm_priority_scoring, # NOQA E501 20 | min_nodes=e.min_nodes if not for_batch_scoring else e.min_nodes_scoring, # NOQA E501 21 | max_nodes=e.max_nodes if not for_batch_scoring else e.max_nodes_scoring, # NOQA E501 22 | idle_seconds_before_scaledown="300" 23 | # #Uncomment the below lines for VNet support 24 | # vnet_resourcegroup_name=vnet_resourcegroup_name, 25 | # vnet_name=vnet_name, 26 | # subnet_name=subnet_name 27 | ) 28 | compute_target = ComputeTarget.create( 29 | workspace, compute_name, compute_config 30 | ) 31 | compute_target.wait_for_completion( 32 | show_output=True, min_node_count=None, timeout_in_minutes=10 33 | ) 34 | return compute_target 35 | except ComputeTargetException as ex: 36 | print(ex) 37 | print("An error occurred trying to provision compute.") 38 | exit(1) 39 | -------------------------------------------------------------------------------- /ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py: -------------------------------------------------------------------------------- 1 | from azureml.pipeline.steps import PythonScriptStep 2 | from azureml.pipeline.core import Pipeline 3 | from azureml.core import Workspace 4 | from azureml.core.runconfig import RunConfiguration 5 | from ml_service.util.attach_compute import get_compute 6 | from ml_service.util.env_variables import Env 7 | from ml_service.util.manage_environment import get_environment 8 | 9 | 10 | def main(): 11 | e = Env() 12 | # Get Azure machine learning workspace 13 | aml_workspace = Workspace.get( 14 | name=e.workspace_name, 15 | subscription_id=e.subscription_id, 16 | resource_group=e.resource_group, 17 | ) 18 | print("get_workspace:") 19 | print(aml_workspace) 20 | 21 | # Get Azure machine learning cluster 22 | aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) 23 | if aml_compute is not None: 24 | print("aml_compute:") 25 | print(aml_compute) 26 | 27 | # Create a reusable Azure ML environment 28 | # Make sure to include `r-essentials' 29 | # in diabetes_regression/conda_dependencies.yml 30 | environment = get_environment( 31 | aml_workspace, 32 | e.aml_env_name, 33 | conda_dependencies_file=e.aml_env_train_conda_dep_file, 34 | create_new=e.rebuild_env, 35 | ) # NOQA: E501 36 | run_config = RunConfiguration() 37 | run_config.environment = environment 38 | 39 | train_step = PythonScriptStep( 40 | name="Train Model", 41 | script_name="train_with_r.py", 42 | compute_target=aml_compute, 43 | source_directory="diabetes_regression/training/R", 44 | runconfig=run_config, 45 | allow_reuse=False, 46 | ) 47 | print("Step Train created") 48 | 49 | steps = [train_step] 50 | 51 | train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) 52 | train_pipeline.validate() 53 | published_pipeline = train_pipeline.publish( 54 | name=e.pipeline_name, 55 | description="Model training/retraining pipeline", 56 | version=e.build_id, 57 | ) 58 | print(f"Published pipeline: {published_pipeline.name}") 59 | print(f"for build {published_pipeline.version}") 60 | 61 | 62 | if __name__ == "__main__": 63 | main() 64 | -------------------------------------------------------------------------------- /ml_service/util/create_scoring_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from azureml.core import Workspace 4 | from azureml.core.environment import Environment 5 | from azureml.core.model import Model, InferenceConfig 6 | import shutil 7 | from ml_service.util.env_variables import Env 8 | 9 | e = Env() 10 | 11 | # Get Azure machine learning workspace 12 | ws = Workspace.get( 13 | name=e.workspace_name, 14 | subscription_id=e.subscription_id, 15 | resource_group=e.resource_group 16 | ) 17 | 18 | parser = argparse.ArgumentParser("create scoring image") 19 | parser.add_argument( 20 | "--output_image_location_file", 21 | type=str, 22 | help=("Name of a file to write image location to, " 23 | "in format REGISTRY.azurecr.io/IMAGE_NAME:IMAGE_VERSION") 24 | ) 25 | args = parser.parse_args() 26 | 27 | model = Model(ws, name=e.model_name, version=e.model_version) 28 | sources_dir = e.sources_directory_train 29 | if (sources_dir is None): 30 | sources_dir = 'diabetes_regression' 31 | score_script = os.path.join(".", sources_dir, e.score_script) 32 | score_file = os.path.basename(score_script) 33 | path_to_scoring = os.path.dirname(score_script) 34 | cwd = os.getcwd() 35 | # Copy conda_dependencies.yml into scoring as this method does not accept relative paths. # NOQA: E501 36 | shutil.copy(os.path.join(".", sources_dir, 37 | "conda_dependencies.yml"), path_to_scoring) 38 | os.chdir(path_to_scoring) 39 | 40 | scoring_env = Environment.from_conda_specification(name="scoringenv", file_path="conda_dependencies.yml") # NOQA: E501 41 | inference_config = InferenceConfig( 42 | entry_script=score_file, environment=scoring_env) 43 | package = Model.package(ws, [model], inference_config) 44 | package.wait_for_creation(show_output=True) 45 | # Display the package location/ACR path 46 | print(package.location) 47 | 48 | os.chdir(cwd) 49 | 50 | if package.state != "Succeeded": 51 | raise Exception("Image creation status: {package.creation_state}") 52 | 53 | print("Package stored at {} with build log {}".format(package.location, package.package_build_log_uri)) # NOQA: E501 54 | 55 | # Save the Image Location for other AzDO jobs after script is complete 56 | if args.output_image_location_file is not None: 57 | print("Writing image location to %s" % args.output_image_location_file) 58 | with open(args.output_image_location_file, "w") as out_file: 59 | out_file.write(str(package.location)) 60 | -------------------------------------------------------------------------------- /ml_service/pipelines/run_train_pipeline.py: -------------------------------------------------------------------------------- 1 | from azureml.pipeline.core import PublishedPipeline 2 | from azureml.core import Experiment, Workspace 3 | import argparse 4 | from ml_service.util.env_variables import Env 5 | 6 | 7 | def main(): 8 | 9 | parser = argparse.ArgumentParser("register") 10 | parser.add_argument( 11 | "--output_pipeline_id_file", 12 | type=str, 13 | default="pipeline_id.txt", 14 | help="Name of a file to write pipeline ID to" 15 | ) 16 | parser.add_argument( 17 | "--skip_train_execution", 18 | action="store_true", 19 | help=("Do not trigger the execution. " 20 | "Use this in Azure DevOps when using a server job to trigger") 21 | ) 22 | args = parser.parse_args() 23 | 24 | e = Env() 25 | 26 | aml_workspace = Workspace.get( 27 | name=e.workspace_name, 28 | subscription_id=e.subscription_id, 29 | resource_group=e.resource_group 30 | ) 31 | 32 | # Find the pipeline that was published by the specified build ID 33 | pipelines = PublishedPipeline.list(aml_workspace) 34 | matched_pipes = [] 35 | 36 | for p in pipelines: 37 | if p.name == e.pipeline_name: 38 | if p.version == e.build_id: 39 | matched_pipes.append(p) 40 | 41 | if(len(matched_pipes) > 1): 42 | published_pipeline = None 43 | raise Exception(f"Multiple active pipelines are published for build {e.build_id}.") # NOQA: E501 44 | elif(len(matched_pipes) == 0): 45 | published_pipeline = None 46 | raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}") # NOQA: E501 47 | else: 48 | published_pipeline = matched_pipes[0] 49 | print("published pipeline id is", published_pipeline.id) 50 | 51 | # Save the Pipeline ID for other AzDO jobs after script is complete 52 | if args.output_pipeline_id_file is not None: 53 | with open(args.output_pipeline_id_file, "w") as out_file: 54 | out_file.write(published_pipeline.id) 55 | 56 | if(args.skip_train_execution is False): 57 | pipeline_parameters = {"model_name": e.model_name} 58 | tags = {"BuildId": e.build_id} 59 | if (e.build_uri is not None): 60 | tags["BuildUri"] = e.build_uri 61 | experiment = Experiment( 62 | workspace=aml_workspace, 63 | name=e.experiment_name) 64 | run = experiment.submit( 65 | published_pipeline, 66 | tags=tags, 67 | pipeline_parameters=pipeline_parameters) 68 | 69 | print("Pipeline run initiated ", run.id) 70 | 71 | 72 | if __name__ == "__main__": 73 | main() 74 | -------------------------------------------------------------------------------- /diabetes_regression/util/model_helper.py: -------------------------------------------------------------------------------- 1 | """ 2 | model_helper.py 3 | """ 4 | from azureml.core import Run 5 | from azureml.core import Workspace 6 | from azureml.core.model import Model as AMLModel 7 | 8 | 9 | def get_current_workspace() -> Workspace: 10 | """ 11 | Retrieves and returns the current workspace. 12 | Will not work when ran locally. 13 | 14 | Parameters: 15 | None 16 | 17 | Return: 18 | The current workspace. 19 | """ 20 | run = Run.get_context(allow_offline=False) 21 | experiment = run.experiment 22 | return experiment.workspace 23 | 24 | 25 | def get_model( 26 | model_name: str, 27 | model_version: int = None, # If none, return latest model 28 | tag_name: str = None, 29 | tag_value: str = None, 30 | aml_workspace: Workspace = None 31 | ) -> AMLModel: 32 | """ 33 | Retrieves and returns a model from the workspace by its name 34 | and (optional) tag. 35 | 36 | Parameters: 37 | aml_workspace (Workspace): aml.core Workspace that the model lives. 38 | model_name (str): name of the model we are looking for 39 | (optional) model_version (str): model version. Latest if not provided. 40 | (optional) tag (str): the tag value & name the model was registered under. 41 | 42 | Return: 43 | A single aml model from the workspace that matches the name and tag, or 44 | None. 45 | """ 46 | if aml_workspace is None: 47 | print("No workspace defined - using current experiment workspace.") 48 | aml_workspace = get_current_workspace() 49 | 50 | tags = None 51 | if tag_name is not None or tag_value is not None: 52 | # Both a name and value must be specified to use tags. 53 | if tag_name is None or tag_value is None: 54 | raise ValueError( 55 | "model_tag_name and model_tag_value should both be supplied" 56 | + "or excluded" # NOQA: E501 57 | ) 58 | tags = [[tag_name, tag_value]] 59 | 60 | model = None 61 | if model_version is not None: 62 | # TODO(tcare): Finding a specific version currently expects exceptions 63 | # to propagate in the case we can't find the model. This call may 64 | # result in a WebserviceException that may or may not be due to the 65 | # model not existing. 66 | model = AMLModel( 67 | aml_workspace, 68 | name=model_name, 69 | version=model_version, 70 | tags=tags) 71 | else: 72 | models = AMLModel.list( 73 | aml_workspace, name=model_name, tags=tags, latest=True) 74 | if len(models) == 1: 75 | model = models[0] 76 | elif len(models) > 1: 77 | raise Exception("Expected only one model") 78 | 79 | return model 80 | -------------------------------------------------------------------------------- /environment_setup/tf-templates/main.tf: -------------------------------------------------------------------------------- 1 | provider "azurerm" { 2 | version = "=2.3.0" 3 | features {} 4 | } 5 | 6 | variable BASE_NAME {} 7 | variable RESOURCE_GROUP {} 8 | variable WORKSPACE_NAME {} 9 | 10 | #-------------------------------------------------------------------------------- 11 | 12 | #Set the already-existing resource group 13 | data "azurerm_resource_group" "amlrg" { 14 | name = var.RESOURCE_GROUP 15 | } 16 | 17 | #Set client config for a.o. tenant id 18 | data "azurerm_client_config" "currentconfig" { 19 | } 20 | 21 | #-------------------------------------------------------------------------------- 22 | 23 | # Storage account for AML Service 24 | resource "azurerm_storage_account" "amlstor" { 25 | name = "${var.BASE_NAME}amlsa" 26 | location = data.azurerm_resource_group.amlrg.location 27 | resource_group_name = data.azurerm_resource_group.amlrg.name 28 | account_tier = "Standard" 29 | account_replication_type = "LRS" 30 | } 31 | 32 | # Keyvault for AML Service 33 | resource "azurerm_key_vault" "amlkv" { 34 | name = "${var.BASE_NAME}-AML-KV" 35 | location = data.azurerm_resource_group.amlrg.location 36 | resource_group_name = data.azurerm_resource_group.amlrg.name 37 | tenant_id = data.azurerm_client_config.currentconfig.tenant_id 38 | sku_name = "standard" 39 | } 40 | 41 | # App Insights for AML Service 42 | resource "azurerm_application_insights" "amlai" { 43 | name = "${var.BASE_NAME}-AML-AI" 44 | location = data.azurerm_resource_group.amlrg.location 45 | resource_group_name = data.azurerm_resource_group.amlrg.name 46 | application_type = "web" 47 | } 48 | 49 | # Container registry for AML Service 50 | resource "azurerm_container_registry" "amlacr" { 51 | name = "${var.BASE_NAME}amlcr" 52 | resource_group_name = data.azurerm_resource_group.amlrg.name 53 | location = data.azurerm_resource_group.amlrg.location 54 | sku = "Standard" 55 | admin_enabled = true 56 | } 57 | 58 | # ML Workspace for AML Service, depending on the storage account, Keyvault, App Insights and ACR. 59 | resource "azurerm_machine_learning_workspace" "amlws" { 60 | name = var.WORKSPACE_NAME 61 | location = data.azurerm_resource_group.amlrg.location 62 | resource_group_name = data.azurerm_resource_group.amlrg.name 63 | application_insights_id = azurerm_application_insights.amlai.id 64 | key_vault_id = azurerm_key_vault.amlkv.id 65 | storage_account_id = azurerm_storage_account.amlstor.id 66 | container_registry_id = azurerm_container_registry.amlacr.id 67 | 68 | identity { 69 | type = "SystemAssigned" 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /ml_service/util/smoke_test_scoring_service.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import requests 3 | import time 4 | from azureml.core import Workspace 5 | from azureml.core.webservice import AksWebservice, AciWebservice 6 | from ml_service.util.env_variables import Env 7 | import secrets 8 | 9 | 10 | input = {"data": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 11 | [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]]} 12 | output_len = 2 13 | 14 | 15 | def call_web_service(e, service_type, service_name): 16 | aml_workspace = Workspace.get( 17 | name=e.workspace_name, 18 | subscription_id=e.subscription_id, 19 | resource_group=e.resource_group 20 | ) 21 | print("Fetching service") 22 | headers = {} 23 | if service_type == "ACI": 24 | service = AciWebservice(aml_workspace, service_name) 25 | else: 26 | service = AksWebservice(aml_workspace, service_name) 27 | if service.auth_enabled: 28 | service_keys = service.get_keys() 29 | headers['Authorization'] = 'Bearer ' + service_keys[0] 30 | print("Testing service") 31 | print(". url: %s" % service.scoring_uri) 32 | output = call_web_app(service.scoring_uri, headers) 33 | 34 | return output 35 | 36 | 37 | def call_web_app(url, headers): 38 | 39 | # Generate an HTTP 'traceparent' distributed tracing header 40 | # (per the W3C Trace Context proposed specification). 41 | headers['traceparent'] = "00-{0}-{1}-00".format( 42 | secrets.token_hex(16), secrets.token_hex(8)) 43 | 44 | retries = 600 45 | for i in range(retries): 46 | try: 47 | response = requests.post( 48 | url, json=input, headers=headers) 49 | response.raise_for_status() 50 | return response.json() 51 | except requests.exceptions.HTTPError as e: 52 | if i == retries - 1: 53 | raise e 54 | print(e) 55 | print("Retrying...") 56 | time.sleep(1) 57 | 58 | 59 | def main(): 60 | 61 | parser = argparse.ArgumentParser("smoke_test_scoring_service.py") 62 | 63 | parser.add_argument( 64 | "--type", 65 | type=str, 66 | choices=["AKS", "ACI", "Webapp"], 67 | required=True, 68 | help="type of service" 69 | ) 70 | parser.add_argument( 71 | "--service", 72 | type=str, 73 | required=True, 74 | help="Name of the image to test" 75 | ) 76 | args = parser.parse_args() 77 | 78 | e = Env() 79 | if args.type == "Webapp": 80 | output = call_web_app(args.service, {}) 81 | else: 82 | output = call_web_service(e, args.type, args.service) 83 | print("Verifying service output") 84 | 85 | assert "result" in output 86 | assert len(output["result"]) == output_len 87 | print("Smoke test successful.") 88 | 89 | 90 | if __name__ == '__main__': 91 | main() 92 | -------------------------------------------------------------------------------- /environment_setup/iac-create-environment-pipeline-tf.yml: -------------------------------------------------------------------------------- 1 | # CI/PR Pipeline that deploys an TF template to create or update the resources needed by the other pipelines. 2 | trigger: 3 | branches: 4 | include: 5 | - master 6 | paths: 7 | include: 8 | - environment_setup/tf-templates/* 9 | pr: 10 | branches: 11 | include: 12 | - master 13 | paths: 14 | include: 15 | - environment_setup/tf-templates/* 16 | 17 | pool: 18 | vmImage: 'ubuntu-latest' 19 | 20 | variables: 21 | - group: devopsforai-aml-vg 22 | 23 | steps: 24 | - task: charleszipp.azure-pipelines-tasks-terraform.azure-pipelines-tasks-terraform-installer.TerraformInstaller@0 25 | displayName: 'Use Terraform 0.12.24' 26 | inputs: 27 | terraformVersion: 0.12.24 28 | 29 | - task: charleszipp.azure-pipelines-tasks-terraform.azure-pipelines-tasks-terraform-cli.TerraformCLI@0 30 | displayName: 'TF init - Deploy MLOps resources to Azure' 31 | inputs: 32 | command: init 33 | commandOptions: '-backend=true -backend-config=$(Build.SourcesDirectory)/environment_setup/tf-templates/backend.tf' 34 | workingDirectory: '$(Build.SourcesDirectory)/environment_setup/tf-templates' 35 | backendType: azurerm 36 | backendServiceArm: $(AZURE_RM_SVC_CONNECTION) 37 | ensureBackend: true 38 | backendAzureRmResourceGroupLocation: $(LOCATION) 39 | backendAzureRmResourceGroupName: $(RESOURCE_GROUP) 40 | backendAzureRmStorageAccountName: '$(BASE_NAME)statestor' 41 | backendAzureRmStorageAccountSku: 'Standard_LRS' 42 | backendAzureRmContainerName: 'tfstate-cont' 43 | backendAzureRmKey: 'mlopsinfra.tfstate' 44 | 45 | - task: charleszipp.azure-pipelines-tasks-terraform.azure-pipelines-tasks-terraform-cli.TerraformCLI@0 46 | displayName: 'TF validate - Deploy MLOps resources to Azure' 47 | inputs: 48 | command: validate 49 | workingDirectory: '$(Build.SourcesDirectory)/environment_setup/tf-templates' 50 | 51 | - task: charleszipp.azure-pipelines-tasks-terraform.azure-pipelines-tasks-terraform-cli.TerraformCLI@0 52 | displayName: 'TF plan - Deploy MLOps resources to Azure' 53 | inputs: 54 | command: plan 55 | workingDirectory: '$(Build.SourcesDirectory)/environment_setup/tf-templates' 56 | environmentServiceName: $(AZURE_RM_SVC_CONNECTION) 57 | env: 58 | TF_VAR_BASE_NAME: $(BASE_NAME) 59 | TF_VAR_RESOURCE_GROUP: $(RESOURCE_GROUP) 60 | TF_VAR_WORKSPACE_NAME: $(WORKSPACE_NAME) 61 | 62 | - task: charleszipp.azure-pipelines-tasks-terraform.azure-pipelines-tasks-terraform-cli.TerraformCLI@0 63 | displayName: 'TF apply - Deploy MLOps resources to Azure' 64 | inputs: 65 | command: apply 66 | workingDirectory: '$(Build.SourcesDirectory)/environment_setup/tf-templates' 67 | environmentServiceName: $(AZURE_RM_SVC_CONNECTION) 68 | env: 69 | TF_VAR_BASE_NAME: $(BASE_NAME) 70 | TF_VAR_RESOURCE_GROUP: $(RESOURCE_GROUP) 71 | TF_VAR_WORKSPACE_NAME: $(WORKSPACE_NAME) 72 | 73 | -------------------------------------------------------------------------------- /experimentation/Diabetes Ridge Regression Scoring.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Score Data with a Ridge Regression Model Trained on the Diabetes Dataset" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This notebook loads the model trained in the Diabetes Ridge Regression Training notebook, prepares the data, and scores the data." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import json\n", 24 | "import numpy\n", 25 | "from azureml.core.model import Model\n", 26 | "import joblib" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## Load Model" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "model_path = Model.get_model_path(model_name=\"sklearn_regression_model.pkl\")\n", 43 | "model = joblib.load(model_path)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Prepare Data" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 3, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "raw_data = '{\"data\":[[1,2,3,4,5,6,7,8,9,10],[10,9,8,7,6,5,4,3,2,1]]}'\n", 60 | "\n", 61 | "data = json.loads(raw_data)[\"data\"]\n", 62 | "data = numpy.array(data)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "## Score Data" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "Test result: {'result': [5113.099642122813, 3713.6329271385353]}\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "request_headers = {}\n", 87 | "\n", 88 | "result = model.predict(data)\n", 89 | "print(\"Test result: \", {\"result\": result.tolist()})" 90 | ] 91 | } 92 | ], 93 | "metadata": { 94 | "kernelspec": { 95 | "display_name": "Python (storedna)", 96 | "language": "python", 97 | "name": "storedna" 98 | }, 99 | "language_info": { 100 | "codemirror_mode": { 101 | "name": "ipython", 102 | "version": 3 103 | }, 104 | "file_extension": ".py", 105 | "mimetype": "text/x-python", 106 | "name": "python", 107 | "nbconvert_exporter": "python", 108 | "pygments_lexer": "ipython3", 109 | "version": "3.6.9" 110 | } 111 | }, 112 | "nbformat": 4, 113 | "nbformat_minor": 2 114 | } 115 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # Azure Subscription Variables 2 | SUBSCRIPTION_ID = '' 3 | LOCATION = '' 4 | TENANT_ID = '' 5 | BASE_NAME = '' 6 | SP_APP_ID = '' 7 | SP_APP_SECRET = '' 8 | RESOURCE_GROUP = 'mlops-RG' 9 | 10 | # Mock build/release ID for local testing 11 | BUILD_BUILDID = '001' 12 | 13 | # Azure ML Workspace Variables 14 | WORKSPACE_NAME = 'mlops-aml-ws' 15 | EXPERIMENT_NAME = 'mlopspython' 16 | 17 | # AML Compute Cluster Config 18 | AML_ENV_NAME='diabetes_regression_training_env' 19 | AML_ENV_TRAIN_CONDA_DEP_FILE="conda_dependencies.yml" 20 | AML_COMPUTE_CLUSTER_NAME = 'train-cluster' 21 | AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2' 22 | AML_CLUSTER_MAX_NODES = '4' 23 | AML_CLUSTER_MIN_NODES = '0' 24 | AML_CLUSTER_PRIORITY = 'lowpriority' 25 | # Training Config 26 | MODEL_NAME = 'diabetes_regression_model.pkl' 27 | MODEL_VERSION = '1' 28 | TRAIN_SCRIPT_PATH = 'training/train.py' 29 | 30 | 31 | # AML Pipeline Config 32 | TRAINING_PIPELINE_NAME = 'Training Pipeline' 33 | MODEL_PATH = '' 34 | EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py' 35 | REGISTER_SCRIPT_PATH = 'register/register_model.py' 36 | SOURCES_DIR_TRAIN = 'diabetes_regression' 37 | DATASET_NAME = 'diabetes_ds' 38 | DATASET_VERSION = 'latest' 39 | # Optional. Set it if you have configured non default datastore to point to your data 40 | DATASTORE_NAME = '' 41 | SCORE_SCRIPT = 'scoring/score.py' 42 | 43 | # Optional. Used by a training pipeline with R on Databricks 44 | DB_CLUSTER_ID = '' 45 | 46 | # Optional. Container Image name for image creation 47 | IMAGE_NAME = 'mltrained' 48 | 49 | # Run Evaluation Step in AML pipeline 50 | RUN_EVALUATION = 'true' 51 | 52 | # Set to true cancels the Azure ML pipeline run when evaluation criteria are not met. 53 | ALLOW_RUN_CANCEL = 'true' 54 | 55 | # Flag to allow rebuilding the AML Environment after it was built for the first time. This enables dependency updates from conda_dependencies.yaml. 56 | AML_REBUILD_ENVIRONMENT = 'false' 57 | 58 | 59 | 60 | USE_GPU_FOR_SCORING = "false" 61 | AML_ENV_SCORE_CONDA_DEP_FILE="conda_dependencies_scoring.yml" 62 | AML_ENV_SCORECOPY_CONDA_DEP_FILE="conda_dependencies_scorecopy.yml" 63 | # AML Compute Cluster Config for parallel batch scoring 64 | AML_ENV_NAME_SCORING='diabetes_regression_scoring_env' 65 | AML_ENV_NAME_SCORE_COPY='diabetes_regression_score_copy_env' 66 | AML_COMPUTE_CLUSTER_NAME_SCORING = 'score-cluster' 67 | AML_COMPUTE_CLUSTER_CPU_SKU_SCORING = 'STANDARD_DS2_V2' 68 | AML_CLUSTER_MAX_NODES_SCORING = '4' 69 | AML_CLUSTER_MIN_NODES_SCORING = '0' 70 | AML_CLUSTER_PRIORITY_SCORING = 'lowpriority' 71 | AML_REBUILD_ENVIRONMENT_SCORING = 'true' 72 | BATCHSCORE_SCRIPT_PATH = 'scoring/parallel_batchscore.py' 73 | BATCHSCORE_COPY_SCRIPT_PATH = 'scoring/parallel_batchscore_copyoutput.py' 74 | 75 | 76 | SCORING_DATASTORE_INPUT_CONTAINER = 'input' 77 | SCORING_DATASTORE_INPUT_FILENAME = 'diabetes_scoring_input.csv' 78 | SCORING_DATASTORE_OUTPUT_CONTAINER = 'output' 79 | SCORING_DATASTORE_OUTPUT_FILENAME = 'diabetes_scoring_output.csv' 80 | SCORING_DATASET_NAME = 'diabetes_scoring_ds' 81 | SCORING_PIPELINE_NAME = 'diabetes-scoring-pipeline' -------------------------------------------------------------------------------- /diabetes_regression/training/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) Microsoft Corporation. All rights reserved.​ 3 | ​ 4 | Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, 5 | royalty-free right to use, copy, and modify the software code provided by us 6 | ("Software Code"). You may not sublicense the Software Code or any use of it 7 | (except to your affiliates and to vendors to perform work on your behalf) 8 | through distribution, network access, service agreement, lease, rental, or 9 | otherwise. This license does not purport to express any claim of ownership over 10 | data you may have shared with Microsoft in the creation of the Software Code. 11 | Unless applicable law gives you more rights, Microsoft reserves all other 12 | rights not expressly granted herein, whether by implication, estoppel or 13 | otherwise. ​ 14 | ​ 15 | THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 21 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 22 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | """ 26 | 27 | import os 28 | import pandas as pd 29 | from sklearn.linear_model import Ridge 30 | from sklearn.metrics import mean_squared_error 31 | from sklearn.model_selection import train_test_split 32 | 33 | 34 | # Split the dataframe into test and train data 35 | def split_data(df): 36 | X = df.drop('Y', axis=1).values 37 | y = df['Y'].values 38 | 39 | X_train, X_test, y_train, y_test = train_test_split( 40 | X, y, test_size=0.2, random_state=0) 41 | data = {"train": {"X": X_train, "y": y_train}, 42 | "test": {"X": X_test, "y": y_test}} 43 | return data 44 | 45 | 46 | # Train the model, return the model 47 | def train_model(data, ridge_args): 48 | reg_model = Ridge(**ridge_args) 49 | reg_model.fit(data["train"]["X"], data["train"]["y"]) 50 | return reg_model 51 | 52 | 53 | # Evaluate the metrics for the model 54 | def get_model_metrics(model, data): 55 | preds = model.predict(data["test"]["X"]) 56 | mse = mean_squared_error(preds, data["test"]["y"]) 57 | metrics = {"mse": mse} 58 | return metrics 59 | 60 | 61 | def main(): 62 | print("Running train.py") 63 | 64 | # Define training parameters 65 | ridge_args = {"alpha": 0.5} 66 | 67 | # Load the training data as dataframe 68 | data_dir = "data" 69 | data_file = os.path.join(data_dir, 'diabetes.csv') 70 | train_df = pd.read_csv(data_file) 71 | 72 | data = split_data(train_df) 73 | 74 | # Train the model 75 | model = train_model(data, ridge_args) 76 | 77 | # Log the metrics for the model 78 | metrics = get_model_metrics(model, data) 79 | for (k, v) in metrics.items(): 80 | print(f"{k}: {v}") 81 | 82 | 83 | if __name__ == '__main__': 84 | main() 85 | -------------------------------------------------------------------------------- /.pipelines/diabetes_regression-batchscoring-ci.yml: -------------------------------------------------------------------------------- 1 | # Continuous Integration (CI) pipeline that orchestrates the batch scoring of the diabetes_regression model. 2 | 3 | # Runtime parameters to select artifacts 4 | parameters: 5 | - name : artifactBuildId 6 | displayName: Model Train CI Build ID. Default is 'latest'. 7 | type: string 8 | default: latest 9 | 10 | pr: none 11 | 12 | # Trigger this pipeline on model-train pipeline completion 13 | resources: 14 | containers: 15 | - container: mlops 16 | image: mcr.microsoft.com/mlops/python:latest 17 | pipelines: 18 | - pipeline: model-train-ci 19 | source: Model-Train-Register-CI # Name of the triggering pipeline 20 | trigger: 21 | branches: 22 | include: 23 | - master 24 | 25 | trigger: 26 | branches: 27 | include: 28 | - master 29 | paths: 30 | include: 31 | - diabetes_regression/scoring/parallel_batchscore.py 32 | - ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py 33 | - ml_service/pipelines/run_parallel_batchscore_pipeline.py 34 | 35 | variables: 36 | - template: diabetes_regression-variables-template.yml 37 | - group: devopsforai-aml-vg 38 | 39 | pool: 40 | vmImage: ubuntu-latest 41 | 42 | stages: 43 | - stage: 'Batch_Scoring_Pipeline_CI' 44 | displayName: 'Batch Scoring Pipeline CI' 45 | jobs: 46 | - job: "Build_Batch_Scoring_Pipeline" 47 | displayName: "Build Batch Scoring Pipeline" 48 | container: mlops 49 | timeoutInMinutes: 0 50 | steps: 51 | - template: code-quality-template.yml 52 | - template: diabetes_regression-get-model-id-artifact-template.yml 53 | parameters: 54 | projectId: '$(resources.pipeline.model-train-ci.projectID)' 55 | pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)' 56 | artifactBuildId: ${{ parameters.artifactBuildId }} 57 | - task: AzureCLI@1 58 | displayName: "Publish Batch Scoring Pipeline" 59 | name: publish_batchscore 60 | inputs: 61 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 62 | scriptLocation: inlineScript 63 | workingDirectory: $(Build.SourcesDirectory) 64 | inlineScript: | 65 | set -e # fail on error 66 | export SUBSCRIPTION_ID=$(az account show --query id -o tsv) 67 | # Invoke the Python building and publishing a training pipeline 68 | python -m ml_service.pipelines.diabetes_regression_build_parallel_batchscore_pipeline 69 | env: 70 | SCORING_DATASTORE_ACCESS_KEY: $(SCORING_DATASTORE_ACCESS_KEY) 71 | 72 | - job: "Run_Batch_Score_Pipeline" 73 | displayName: "Run Batch Scoring Pipeline" 74 | dependsOn: ["Build_Batch_Scoring_Pipeline"] 75 | timeoutInMinutes: 240 76 | pool: server 77 | variables: 78 | pipeline_id: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['publish_batchscore.pipeline_id']] 79 | model_name: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['get_model.MODEL_NAME']] 80 | model_version: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['get_model.MODEL_VERSION']] 81 | steps: 82 | - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0 83 | displayName: 'Invoke Batch Scoring pipeline' 84 | inputs: 85 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 86 | PipelineId: '$(pipeline_id)' 87 | ExperimentName: '$(EXPERIMENT_NAME)' 88 | PipelineParameters: '"ParameterAssignments": {"model_name": "$(model_name)", "model_version": "$(model_version)"}' 89 | -------------------------------------------------------------------------------- /diabetes_regression/scoring/parallel_batchscore_copyoutput.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) Microsoft Corporation. All rights reserved.​ 3 | ​ 4 | Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, 5 | royalty-free right to use, copy, and modify the software code provided by us 6 | ("Software Code"). You may not sublicense the Software Code or any use of it 7 | (except to your affiliates and to vendors to perform work on your behalf) 8 | through distribution, network access, service agreement, lease, rental, or 9 | otherwise. This license does not purport to express any claim of ownership over 10 | data you may have shared with Microsoft in the creation of the Software Code. 11 | Unless applicable law gives you more rights, Microsoft reserves all other 12 | rights not expressly granted herein, whether by implication, estoppel or 13 | otherwise. ​ 14 | ​ 15 | THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 21 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 22 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | """ 26 | 27 | from azure.storage.blob import ContainerClient 28 | from datetime import datetime, date, timezone 29 | import argparse 30 | import os 31 | 32 | 33 | def parse_args(): 34 | parser = argparse.ArgumentParser() 35 | parser.add_argument("--output_path", type=str, default=None) 36 | parser.add_argument("--scoring_datastore", type=str, default=None) 37 | parser.add_argument("--score_container", type=str, default=None) 38 | parser.add_argument("--scoring_datastore_key", type=str, default=None) 39 | parser.add_argument("--scoring_output_filename", type=str, default=None) 40 | 41 | return parser.parse_args() 42 | 43 | 44 | def copy_output(args): 45 | print("Output : {}".format(args.output_path)) 46 | 47 | accounturl = "https://{}.blob.core.windows.net".format( 48 | args.scoring_datastore 49 | ) # NOQA E501 50 | 51 | containerclient = ContainerClient( 52 | accounturl, args.score_container, args.scoring_datastore_key 53 | ) 54 | 55 | destfolder = date.today().isoformat() 56 | filetime = ( 57 | datetime.now(timezone.utc) 58 | .time() 59 | .isoformat("milliseconds") 60 | .replace(":", "_") 61 | .replace(".", "_") 62 | ) # noqa E501 63 | destfilenameparts = args.scoring_output_filename.split(".") 64 | destblobname = "{}/{}_{}.{}".format( 65 | destfolder, destfilenameparts[0], filetime, destfilenameparts[1] 66 | ) 67 | 68 | destblobclient = containerclient.get_blob_client(destblobname) 69 | with open( 70 | os.path.join(args.output_path, "parallel_run_step.txt"), "rb" 71 | ) as scorefile: # noqa E501 72 | destblobclient.upload_blob(scorefile, blob_type="BlockBlob") 73 | 74 | 75 | if __name__ == "__main__": 76 | args = parse_args() 77 | if ( 78 | args.scoring_datastore is None 79 | or args.scoring_datastore.strip() == "" 80 | or args.score_container is None 81 | or args.score_container.strip() == "" 82 | or args.scoring_datastore_key is None 83 | or args.scoring_datastore_key.strip() == "" 84 | or args.scoring_output_filename is None 85 | or args.scoring_output_filename.strip() == "" 86 | or args.output_path is None 87 | or args.output_path.strip() == "" 88 | ): 89 | print("Missing parameters") 90 | else: 91 | copy_output(args) 92 | -------------------------------------------------------------------------------- /diabetes_regression/scoring/score.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) Microsoft Corporation. All rights reserved.​ 3 | ​ 4 | Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, 5 | royalty-free right to use, copy, and modify the software code provided by us 6 | ("Software Code"). You may not sublicense the Software Code or any use of it 7 | (except to your affiliates and to vendors to perform work on your behalf) 8 | through distribution, network access, service agreement, lease, rental, or 9 | otherwise. This license does not purport to express any claim of ownership over 10 | data you may have shared with Microsoft in the creation of the Software Code. 11 | Unless applicable law gives you more rights, Microsoft reserves all other 12 | rights not expressly granted herein, whether by implication, estoppel or 13 | otherwise. ​ 14 | ​ 15 | THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 21 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 22 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | """ 26 | import numpy 27 | import joblib 28 | import os 29 | from azureml.core.model import Model 30 | from inference_schema.schema_decorators \ 31 | import input_schema, output_schema 32 | from inference_schema.parameter_types.numpy_parameter_type \ 33 | import NumpyParameterType 34 | 35 | 36 | def init(): 37 | # load the model from file into a global object 38 | global model 39 | 40 | # we assume that we have just one model 41 | # AZUREML_MODEL_DIR is an environment variable created during deployment. 42 | # It is the path to the model folder 43 | # (./azureml-models/$MODEL_NAME/$VERSION) 44 | model_path = Model.get_model_path( 45 | os.getenv("AZUREML_MODEL_DIR").split('/')[-2]) 46 | 47 | model = joblib.load(model_path) 48 | 49 | 50 | input_sample = numpy.array([ 51 | [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], 52 | [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]]) 53 | output_sample = numpy.array([ 54 | 5021.509689995557, 55 | 3693.645386402646]) 56 | 57 | 58 | # Inference_schema generates a schema for your web service 59 | # It then creates an OpenAPI (Swagger) specification for the web service 60 | # at http:///swagger.json 61 | @input_schema('data', NumpyParameterType(input_sample)) 62 | @output_schema(NumpyParameterType(output_sample)) 63 | def run(data, request_headers): 64 | result = model.predict(data) 65 | 66 | # Demonstrate how we can log custom data into the Application Insights 67 | # traces collection. 68 | # The 'X-Ms-Request-id' value is generated internally and can be used to 69 | # correlate a log entry with the Application Insights requests collection. 70 | # The HTTP 'traceparent' header may be set by the caller to implement 71 | # distributed tracing (per the W3C Trace Context proposed specification) 72 | # and can be used to correlate the request to external systems. 73 | print(('{{"RequestId":"{0}", ' 74 | '"TraceParent":"{1}", ' 75 | '"NumberOfPredictions":{2}}}' 76 | ).format( 77 | request_headers.get("X-Ms-Request-Id", ""), 78 | request_headers.get("Traceparent", ""), 79 | len(result) 80 | )) 81 | 82 | return {"result": result.tolist()} 83 | 84 | 85 | if __name__ == "__main__": 86 | # Test scoring 87 | init() 88 | test_row = '{"data":[[1,2,3,4,5,6,7,8,9,10],[10,9,8,7,6,5,4,3,2,1]]}' 89 | prediction = run(test_row, {}) 90 | print("Test result: ", prediction) 91 | -------------------------------------------------------------------------------- /.pipelines/diabetes_regression-ci.yml: -------------------------------------------------------------------------------- 1 | # Continuous Integration (CI) pipeline that orchestrates the training, evaluation, and registration of the diabetes_regression model. 2 | 3 | resources: 4 | containers: 5 | - container: mlops 6 | image: mcr.microsoft.com/mlops/python:latest 7 | 8 | pr: none 9 | trigger: 10 | branches: 11 | include: 12 | - master 13 | paths: 14 | include: 15 | - diabetes_regression/ 16 | - ml_service/pipelines/diabetes_regression_build_train_pipeline.py 17 | - ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py 18 | - ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py 19 | 20 | variables: 21 | - template: diabetes_regression-variables-template.yml 22 | - group: devopsforai-aml-vg 23 | 24 | pool: 25 | vmImage: ubuntu-latest 26 | 27 | stages: 28 | - stage: 'Model_CI' 29 | displayName: 'Model CI' 30 | jobs: 31 | - job: "Model_CI_Pipeline" 32 | displayName: "Model CI Pipeline" 33 | container: mlops 34 | timeoutInMinutes: 0 35 | steps: 36 | - template: code-quality-template.yml 37 | - task: AzureCLI@1 38 | inputs: 39 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 40 | scriptLocation: inlineScript 41 | workingDirectory: $(Build.SourcesDirectory) 42 | inlineScript: | 43 | set -e # fail on error 44 | export SUBSCRIPTION_ID=$(az account show --query id -o tsv) 45 | # Invoke the Python building and publishing a training pipeline 46 | python -m ml_service.pipelines.diabetes_regression_build_train_pipeline 47 | displayName: 'Publish Azure Machine Learning Pipeline' 48 | 49 | - stage: 'Trigger_AML_Pipeline' 50 | displayName: 'Train and evaluate model' 51 | condition: succeeded() 52 | variables: 53 | BUILD_URI: '$(SYSTEM.COLLECTIONURI)$(SYSTEM.TEAMPROJECT)/_build/results?buildId=$(BUILD.BUILDID)' 54 | jobs: 55 | - job: "Get_Pipeline_ID" 56 | condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true')) 57 | displayName: "Get Pipeline ID for execution" 58 | container: mlops 59 | timeoutInMinutes: 0 60 | steps: 61 | - task: AzureCLI@1 62 | inputs: 63 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 64 | scriptLocation: inlineScript 65 | workingDirectory: $(Build.SourcesDirectory) 66 | inlineScript: | 67 | set -e # fail on error 68 | export SUBSCRIPTION_ID=$(az account show --query id -o tsv) 69 | python -m ml_service.pipelines.run_train_pipeline --output_pipeline_id_file "pipeline_id.txt" --skip_train_execution 70 | # Set AMLPIPELINEID variable for next AML Pipeline task in next job 71 | AMLPIPELINEID="$(cat pipeline_id.txt)" 72 | echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID" 73 | name: 'getpipelineid' 74 | displayName: 'Get Pipeline ID' 75 | - job: "Run_ML_Pipeline" 76 | dependsOn: "Get_Pipeline_ID" 77 | displayName: "Trigger ML Training Pipeline" 78 | timeoutInMinutes: 0 79 | pool: server 80 | variables: 81 | AMLPIPELINE_ID: $[ dependencies.Get_Pipeline_ID.outputs['getpipelineid.AMLPIPELINEID'] ] 82 | steps: 83 | - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0 84 | displayName: 'Invoke ML pipeline' 85 | inputs: 86 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 87 | PipelineId: '$(AMLPIPELINE_ID)' 88 | ExperimentName: '$(EXPERIMENT_NAME)' 89 | PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}, "tags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}, "StepTags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}' 90 | - job: "Training_Run_Report" 91 | dependsOn: "Run_ML_Pipeline" 92 | condition: always() 93 | displayName: "Publish artifact if new model was registered" 94 | container: mlops 95 | timeoutInMinutes: 0 96 | steps: 97 | - template: diabetes_regression-publish-model-artifact-template.yml 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | page_type: sample 3 | languages: 4 | - python 5 | products: 6 | - azure 7 | - azure-machine-learning-service 8 | - azure-devops 9 | description: "Code which demonstrates how to set up and operationalize an MLOps flow leveraging Azure Machine Learning and Azure DevOps." 10 | --- 11 | 12 | # MLOps with Azure ML 13 | 14 | CI: [![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/Model-Train-Register-CI?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=160&branchName=master) 15 | 16 | CD: [![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/microsoft.MLOpsPython-CD?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=161&branchName=master) 17 | 18 | MLOps will help you to understand how to build a Continuous Integration and Continuous Delivery pipeline for an ML/AI project. We will be using the Azure DevOps Project for build and release/deployment pipelines along with Azure ML services for model retraining pipeline, model management and operationalization. 19 | 20 | ![ML lifecycle](/docs/images/ml-lifecycle.png) 21 | 22 | This template contains code and pipeline definitions for a machine learning project that demonstrates how to automate an end to end ML/AI workflow. 23 | 24 | ## Architecture and Features 25 | 26 | Architecture Reference: [Machine learning operationalization (MLOps) for Python models using Azure Machine Learning](https://docs.microsoft.com/en-us/azure/architecture/reference-architectures/ai/mlops-python) 27 | 28 | This reference architecture shows how to implement continuous integration (CI), continuous delivery (CD), and retraining pipeline for an AI application using Azure DevOps and [Azure Machine Learning](/azure/machine-learning/service/overview-what-is-azure-ml). The solution is built on the scikit-learn diabetes dataset but can be easily adapted for any AI scenario and other popular build systems such as Jenkins and Travis. 29 | 30 | The build pipelines include DevOps tasks for data sanity tests, unit tests, model training on different compute targets, model version management, model evaluation/model selection, model deployment as realtime web service, staged deployment to QA/prod and integration testing. 31 | 32 | ## Prerequisite 33 | 34 | - Active Azure subscription 35 | - At least contributor access to Azure subscription 36 | 37 | ## Getting Started 38 | 39 | To deploy this solution in your subscription, follow the manual instructions in the [getting started](docs/getting_started.md) doc. Then optionally follow the guide for [integrating your own code](docs/custom_model.md) with this repository template. 40 | 41 | ### Repo Details 42 | 43 | You can find the details of the code and scripts in the repository [here](/docs/code_description.md) 44 | 45 | ### References 46 | 47 | - [Azure Machine Learning (Azure ML) Service Workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/overview-what-is-azure-ml) 48 | - [Azure ML CLI](https://docs.microsoft.com/en-us/azure/machine-learning/service/reference-azure-machine-learning-cli) 49 | - [Azure ML Samples](https://docs.microsoft.com/en-us/azure/machine-learning/service/samples-notebooks) 50 | - [Azure ML Python SDK Quickstart](https://docs.microsoft.com/en-us/azure/machine-learning/service/quickstart-create-workspace-with-python) 51 | - [Azure DevOps](https://docs.microsoft.com/en-us/azure/devops/?view=vsts) 52 | 53 | ## Contributing 54 | 55 | This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit 56 | 57 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. 58 | 59 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 60 | -------------------------------------------------------------------------------- /data/data_test.py: -------------------------------------------------------------------------------- 1 | # test integrity of the input data 2 | """ 3 | Copyright (C) Microsoft Corporation. All rights reserved.​ 4 | ​ 5 | Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, 6 | royalty-free right to use, copy, and modify the software code provided by us 7 | ("Software Code"). You may not sublicense the Software Code or any use of it 8 | (except to your affiliates and to vendors to perform work on your behalf) 9 | through distribution, network access, service agreement, lease, rental, or 10 | otherwise. This license does not purport to express any claim of ownership over 11 | data you may have shared with Microsoft in the creation of the Software Code. 12 | Unless applicable law gives you more rights, Microsoft reserves all other 13 | rights not expressly granted herein, whether by implication, estoppel or 14 | otherwise. ​ 15 | ​ 16 | THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 23 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE 25 | POSSIBILITY OF SUCH DAMAGE. 26 | """ 27 | import os 28 | import numpy as np 29 | import pandas as pd 30 | 31 | 32 | # get absolute path of csv files from data folder 33 | def get_absPath(filename): 34 | """Returns the path of the notebooks folder""" 35 | path = os.path.abspath( 36 | os.path.join( 37 | os.path.dirname( 38 | __file__), os.path.pardir, "data", filename 39 | ) 40 | ) 41 | return path 42 | 43 | 44 | # number of features 45 | expected_columns = 10 46 | 47 | # distribution of features in the training set 48 | historical_mean = np.array( 49 | [ 50 | -3.63962254e-16, 51 | 1.26972339e-16, 52 | -8.01646331e-16, 53 | 1.28856202e-16, 54 | -8.99230414e-17, 55 | 1.29609747e-16, 56 | -4.56397112e-16, 57 | 3.87573332e-16, 58 | -3.84559152e-16, 59 | -3.39848813e-16, 60 | 1.52133484e02, 61 | ] 62 | ) 63 | historical_std = np.array( 64 | [ 65 | 4.75651494e-02, 66 | 4.75651494e-02, 67 | 4.75651494e-02, 68 | 4.75651494e-02, 69 | 4.75651494e-02, 70 | 4.75651494e-02, 71 | 4.75651494e-02, 72 | 4.75651494e-02, 73 | 4.75651494e-02, 74 | 4.75651494e-02, 75 | 7.70057459e01, 76 | ] 77 | ) 78 | 79 | # maximal relative change in feature mean or standrd deviation 80 | # that we can tolerate 81 | shift_tolerance = 3 82 | 83 | 84 | def test_check_schema(): 85 | datafile = get_absPath("diabetes.csv") 86 | # check that file exists 87 | assert os.path.exists(datafile) 88 | dataset = pd.read_csv(datafile) 89 | header = dataset[dataset.columns[:-1]] 90 | actual_columns = header.shape[1] 91 | # check header has expected number of columns 92 | assert actual_columns == expected_columns 93 | 94 | 95 | def test_check_bad_schema(): 96 | datafile = get_absPath("diabetes_bad_schema.csv") 97 | # check that file exists 98 | assert os.path.exists(datafile) 99 | dataset = pd.read_csv(datafile) 100 | header = dataset[dataset.columns[:-1]] 101 | actual_columns = header.shape[1] 102 | # check header has expected number of columns 103 | assert actual_columns != expected_columns 104 | 105 | 106 | def test_check_missing_values(): 107 | datafile = get_absPath("diabetes_missing_values.csv") 108 | # check that file exists 109 | assert os.path.exists(datafile) 110 | dataset = pd.read_csv(datafile) 111 | n_nan = np.sum(np.isnan(dataset.values)) 112 | assert n_nan > 0 113 | 114 | 115 | def test_check_distribution(): 116 | datafile = get_absPath("diabetes_bad_dist.csv") 117 | # check that file exists 118 | assert os.path.exists(datafile) 119 | dataset = pd.read_csv(datafile) 120 | mean = np.mean(dataset.values, axis=0) 121 | std = np.mean(dataset.values, axis=0) 122 | assert ( 123 | np.sum(abs(mean - historical_mean) 124 | > shift_tolerance * abs(historical_mean)) 125 | or np.sum(abs(std - historical_std) 126 | > shift_tolerance * abs(historical_std)) > 0 127 | ) 128 | -------------------------------------------------------------------------------- /ml_service/pipelines/run_parallel_batchscore_pipeline.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) Microsoft Corporation. All rights reserved.​ 3 | ​ 4 | Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, 5 | royalty-free right to use, copy, and modify the software code provided by us 6 | ("Software Code"). You may not sublicense the Software Code or any use of it 7 | (except to your affiliates and to vendors to perform work on your behalf) 8 | through distribution, network access, service agreement, lease, rental, or 9 | otherwise. This license does not purport to express any claim of ownership over 10 | data you may have shared with Microsoft in the creation of the Software Code. 11 | Unless applicable law gives you more rights, Microsoft reserves all other 12 | rights not expressly granted herein, whether by implication, estoppel or 13 | otherwise. ​ 14 | ​ 15 | THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 21 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 22 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | """ 26 | 27 | from azure.storage.blob import ContainerClient 28 | from ml_service.util.env_variables import Env 29 | from azureml.core import Experiment, Workspace 30 | from azureml.pipeline.core import PublishedPipeline 31 | import argparse 32 | 33 | 34 | def parse_args(): 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument("--pipeline_id", type=str, default=None) 37 | return parser.parse_args() 38 | 39 | 40 | def get_pipeline(pipeline_id, ws: Workspace, env: Env): 41 | if pipeline_id is not None: 42 | scoringpipeline = PublishedPipeline.get(ws, pipeline_id) 43 | else: 44 | pipelines = PublishedPipeline.list(ws) 45 | scoringpipelinelist = [ 46 | pl for pl in pipelines if pl.name == env.scoring_pipeline_name 47 | ] # noqa E501 48 | 49 | if scoringpipelinelist.count == 0: 50 | raise Exception( 51 | "No pipeline found matching name:{}".format(env.scoring_pipeline_name) # NOQA: E501 52 | ) 53 | else: 54 | # latest published 55 | scoringpipeline = scoringpipelinelist[0] 56 | 57 | return scoringpipeline 58 | 59 | 60 | def copy_output(step_id: str, env: Env): 61 | accounturl = "https://{}.blob.core.windows.net".format( 62 | env.scoring_datastore_storage_name 63 | ) 64 | 65 | srcblobname = "azureml/{}/{}_out/parallel_run_step.txt".format( 66 | step_id, env.scoring_datastore_storage_name 67 | ) 68 | 69 | srcbloburl = "{}/{}/{}".format( 70 | accounturl, env.scoring_datastore_output_container, srcblobname 71 | ) 72 | 73 | containerclient = ContainerClient( 74 | accounturl, 75 | env.scoring_datastore_output_container, 76 | env.scoring_datastore_access_key, 77 | ) 78 | srcblobproperties = containerclient.get_blob_client( 79 | srcblobname 80 | ).get_blob_properties() # noqa E501 81 | 82 | destfolder = srcblobproperties.last_modified.date().isoformat() 83 | filetime = ( 84 | srcblobproperties.last_modified.time() 85 | .isoformat("milliseconds") 86 | .replace(":", "_") 87 | .replace(".", "_") 88 | ) # noqa E501 89 | destfilenameparts = env.scoring_datastore_output_filename.split(".") 90 | destblobname = "{}/{}_{}.{}".format( 91 | destfolder, destfilenameparts[0], filetime, destfilenameparts[1] 92 | ) 93 | 94 | destblobclient = containerclient.get_blob_client(destblobname) 95 | destblobclient.start_copy_from_url(srcbloburl) 96 | 97 | 98 | def run_batchscore_pipeline(): 99 | try: 100 | env = Env() 101 | 102 | args = parse_args() 103 | 104 | aml_workspace = Workspace.get( 105 | name=env.workspace_name, 106 | subscription_id=env.subscription_id, 107 | resource_group=env.resource_group, 108 | ) 109 | 110 | scoringpipeline = get_pipeline(args.pipeline_id, aml_workspace, env) 111 | 112 | experiment = Experiment(workspace=aml_workspace, name=env.experiment_name) # NOQA: E501 113 | 114 | run = experiment.submit( 115 | scoringpipeline, 116 | pipeline_parameters={ 117 | "model_name": env.model_name, 118 | "model_version": env.model_version, 119 | "model_tag_name": " ", 120 | "model_tag_value": " ", 121 | }, 122 | ) 123 | 124 | run.wait_for_completion(show_output=True) 125 | 126 | if run.get_status() == "Finished": 127 | copy_output(list(run.get_steps())[0].id, env) 128 | 129 | except Exception as ex: 130 | print("Error: {}".format(ex)) 131 | 132 | 133 | if __name__ == "__main__": 134 | run_batchscore_pipeline() 135 | -------------------------------------------------------------------------------- /.pipelines/diabetes_regression-variables-template.yml: -------------------------------------------------------------------------------- 1 | # Pipeline template that defines common runtime environment variables. 2 | variables: 3 | # Source Config 4 | # The directory containing the scripts for training, evaluating, and registering the model 5 | - name: SOURCES_DIR_TRAIN 6 | value: diabetes_regression 7 | # The path to the model training script under SOURCES_DIR_TRAIN 8 | - name: TRAIN_SCRIPT_PATH 9 | value: training/train_aml.py 10 | # The path to the model evaluation script under SOURCES_DIR_TRAIN 11 | - name: EVALUATE_SCRIPT_PATH 12 | value: evaluate/evaluate_model.py 13 | # The path to the model registration script under SOURCES_DIR_TRAIN 14 | - name: REGISTER_SCRIPT_PATH 15 | value: register/register_model.py 16 | # The path to the model scoring script relative to SOURCES_DIR_TRAIN 17 | - name: SCORE_SCRIPT 18 | value: scoring/score.py 19 | 20 | 21 | # Azure ML Variables 22 | - name: EXPERIMENT_NAME 23 | value: mlopspython 24 | - name: DATASET_NAME 25 | value: diabetes_ds 26 | # Uncomment DATASTORE_NAME if you have configured non default datastore to point to your data 27 | # - name: DATASTORE_NAME 28 | # value: datablobstore 29 | - name: DATASET_VERSION 30 | value: latest 31 | - name: TRAINING_PIPELINE_NAME 32 | value: "diabetes-Training-Pipeline" 33 | - name: MODEL_NAME 34 | value: diabetes_regression_model.pkl 35 | 36 | # AML Compute Cluster Config 37 | - name: AML_ENV_NAME 38 | value: diabetes_regression_training_env 39 | - name: AML_ENV_TRAIN_CONDA_DEP_FILE 40 | value: "conda_dependencies.yml" 41 | - name: AML_COMPUTE_CLUSTER_CPU_SKU 42 | value: STANDARD_DS2_V2 43 | - name: AML_COMPUTE_CLUSTER_NAME 44 | value: train-cluster 45 | - name: AML_CLUSTER_MIN_NODES 46 | value: 0 47 | - name: AML_CLUSTER_MAX_NODES 48 | value: 4 49 | - name: AML_CLUSTER_PRIORITY 50 | value: lowpriority 51 | 52 | # The name for the (docker/webapp) scoring image 53 | - name: IMAGE_NAME 54 | value: "diabetestrained" 55 | 56 | # Optional. Used by a training pipeline with R on Databricks 57 | - name: DB_CLUSTER_ID 58 | value: "" 59 | 60 | # These are the default values set in ml_service\util\env_variables.py. Uncomment and override if desired. 61 | # Set to false to disable the evaluation step in the ML pipeline and register the newly trained model unconditionally. 62 | # - name: RUN_EVALUATION 63 | # value: "true" 64 | # Set to false to register the model regardless of the outcome of the evaluation step in the ML pipeline. 65 | # - name: ALLOW_RUN_CANCEL 66 | # value: "true" 67 | 68 | # Flag to allow rebuilding the AML Environment after it was built for the first time. This enables dependency updates from conda_dependencies.yaml. 69 | # - name: AML_REBUILD_ENVIRONMENT 70 | # value: "false" 71 | 72 | # Variables below are used for controlling various aspects of batch scoring 73 | - name: USE_GPU_FOR_SCORING 74 | value: False 75 | # Conda dependencies for the batch scoring step 76 | - name: AML_ENV_SCORE_CONDA_DEP_FILE 77 | value: "conda_dependencies_scoring.yml" 78 | # Conda dependencies for the score copying step 79 | - name: AML_ENV_SCORECOPY_CONDA_DEP_FILE 80 | value: "conda_dependencies_scorecopy.yml" 81 | # AML Compute Cluster Config for parallel batch scoring 82 | - name: AML_ENV_NAME_SCORING 83 | value: diabetes_regression_scoring_env 84 | - name: AML_ENV_NAME_SCORE_COPY 85 | value: diabetes_regression_score_copy_env 86 | - name: AML_COMPUTE_CLUSTER_CPU_SKU_SCORING 87 | value: STANDARD_DS2_V2 88 | - name: AML_COMPUTE_CLUSTER_NAME_SCORING 89 | value: score-cluster 90 | - name: AML_CLUSTER_MIN_NODES_SCORING 91 | value: 0 92 | - name: AML_CLUSTER_MAX_NODES_SCORING 93 | value: 4 94 | - name: AML_CLUSTER_PRIORITY_SCORING 95 | value: lowpriority 96 | # The path to the batch scoring script relative to SOURCES_DIR_TRAIN 97 | - name: BATCHSCORE_SCRIPT_PATH 98 | value: scoring/parallel_batchscore.py 99 | - name: BATCHSCORE_COPY_SCRIPT_PATH 100 | value: scoring/parallel_batchscore_copyoutput.py 101 | # Flag to allow rebuilding the AML Environment after it was built for the first time. 102 | # This enables dependency updates from the conda dependencies yaml for scoring activities. 103 | - name: AML_REBUILD_ENVIRONMENT_SCORING 104 | value: "true" 105 | 106 | # Datastore config for scoring 107 | # The storage account name and key are supplied as variables in a variable group 108 | # in the Azure Pipelines library for this project. Please refer to repo docs for 109 | # more details 110 | 111 | # Blob container where the input data for scoring can be found 112 | - name: SCORING_DATASTORE_INPUT_CONTAINER 113 | value: "input" 114 | # Blobname for the input data - include any applicable path in the string 115 | - name: SCORING_DATASTORE_INPUT_FILENAME 116 | value: "diabetes_scoring_input.csv" 117 | # Blob container where the output data for scoring can be found 118 | - name: SCORING_DATASTORE_OUTPUT_CONTAINER 119 | value: "output" 120 | # Blobname for the output data - include any applicable path in the string 121 | - name: SCORING_DATASTORE_OUTPUT_FILENAME 122 | value: "diabetes_scoring_output.csv" 123 | # Dataset name for input data for scoring 124 | - name: SCORING_DATASET_NAME 125 | value: "diabetes_scoring_ds" 126 | # Scoring pipeline name 127 | - name: SCORING_PIPELINE_NAME 128 | value: "diabetes-scoring-pipeline" 129 | -------------------------------------------------------------------------------- /environment_setup/arm-templates/cloud-environment.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "baseName": { 6 | "type": "string", 7 | "maxLength": 10, 8 | "minLength": 3, 9 | "metadata": { 10 | "description": "The base name to use as prefix to create all the resources." 11 | } 12 | }, 13 | "location": { 14 | "type": "string", 15 | "defaultValue": "eastus", 16 | "allowedValues": [ 17 | "eastus", 18 | "eastus2", 19 | "southcentralus", 20 | "southeastasia", 21 | "westcentralus", 22 | "westeurope", 23 | "westus2", 24 | "centralus" 25 | ], 26 | "metadata": { 27 | "description": "Specifies the location for all resources." 28 | } 29 | }, 30 | "workspace": { 31 | "type": "string" 32 | }, 33 | "storageAccount": { 34 | "type": "string", 35 | "defaultValue": "[concat(toLower(parameters('baseName')), 'amlsa')]" 36 | }, 37 | "keyvault": { 38 | "type": "string", 39 | "defaultValue": "[concat(parameters('baseName'),'-AML-KV')]" 40 | }, 41 | "appInsights": { 42 | "type": "string", 43 | "defaultValue": "[concat(parameters('baseName'),'-AML-AI')]" 44 | }, 45 | "acr": { 46 | "type": "string", 47 | "defaultValue": "[concat(toLower(parameters('baseName')),'amlcr')]" 48 | }, 49 | "sku": { 50 | "type": "string", 51 | "defaultValue": "basic", 52 | "allowedValues": [ 53 | "basic", 54 | "enterprise" 55 | ], 56 | "metadata": { 57 | "description": "Specifies the sku, also referred as 'edition' of the Azure Machine Learning workspace." 58 | } 59 | } 60 | }, 61 | "variables": { 62 | "amlWorkspaceName": "[parameters('workspace')]", 63 | "storageAccountName": "[parameters('storageAccount')]", 64 | "storageAccountType": "Standard_LRS", 65 | "keyVaultName": "[parameters('keyvault')]", 66 | "tenantId": "[subscription().tenantId]", 67 | "applicationInsightsName": "[parameters('appInsights')]", 68 | "containerRegistryName": "[parameters('acr')]" 69 | }, 70 | "resources": [ 71 | { 72 | "type": "Microsoft.Storage/storageAccounts", 73 | "apiVersion": "2018-07-01", 74 | "name": "[variables('storageAccountName')]", 75 | "location": "[parameters('location')]", 76 | "sku": { 77 | "name": "[variables('storageAccountType')]" 78 | }, 79 | "kind": "StorageV2", 80 | "properties": { 81 | "encryption": { 82 | "services": { 83 | "blob": { 84 | "enabled": true 85 | }, 86 | "file": { 87 | "enabled": true 88 | } 89 | }, 90 | "keySource": "Microsoft.Storage" 91 | }, 92 | "supportsHttpsTrafficOnly": true 93 | } 94 | }, 95 | { 96 | "type": "Microsoft.KeyVault/vaults", 97 | "apiVersion": "2018-02-14", 98 | "name": "[variables('keyVaultName')]", 99 | "location": "[parameters('location')]", 100 | "properties": { 101 | "tenantId": "[variables('tenantId')]", 102 | "sku": { 103 | "name": "standard", 104 | "family": "A" 105 | }, 106 | "accessPolicies": [ 107 | ] 108 | } 109 | }, 110 | { 111 | "type": "Microsoft.Insights/components", 112 | "apiVersion": "2015-05-01", 113 | "name": "[variables('applicationInsightsName')]", 114 | "location": "[if(or(equals(parameters('location'),'eastus2'),equals(parameters('location'),'westcentralus')),'southcentralus',parameters('location'))]", 115 | "kind": "web", 116 | "properties": { 117 | "Application_Type": "web" 118 | } 119 | }, 120 | { 121 | "type": "Microsoft.ContainerRegistry/registries", 122 | "apiVersion": "2017-10-01", 123 | "name": "[variables('containerRegistryName')]", 124 | "location": "[parameters('location')]", 125 | "sku": { 126 | "name": "Standard" 127 | }, 128 | "properties": { 129 | "adminUserEnabled": true 130 | } 131 | }, 132 | { 133 | "type": "Microsoft.MachineLearningServices/workspaces", 134 | "apiVersion": "2018-11-19", 135 | "name": "[variables('amlWorkspaceName')]", 136 | "location": "[parameters('location')]", 137 | "dependsOn": [ 138 | "[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]", 139 | "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]", 140 | "[resourceId('Microsoft.Insights/components', variables('applicationInsightsName'))]", 141 | "[resourceId('Microsoft.ContainerRegistry/registries', variables('containerRegistryName'))]" 142 | ], 143 | "identity": { 144 | "type": "systemAssigned" 145 | }, 146 | "sku": { 147 | "tier": "[parameters('sku')]", 148 | "name": "[parameters('sku')]" 149 | }, 150 | "properties": { 151 | "friendlyName": "[variables('amlWorkspaceName')]", 152 | "keyVault": "[resourceId('Microsoft.KeyVault/vaults',variables('keyVaultName'))]", 153 | "applicationInsights": "[resourceId('Microsoft.Insights/components',variables('applicationInsightsName'))]", 154 | "containerRegistry": "[resourceId('Microsoft.ContainerRegistry/registries',variables('containerRegistryName'))]", 155 | "storageAccount": "[resourceId('Microsoft.Storage/storageAccounts/',variables('storageAccountName'))]" 156 | } 157 | } 158 | ] 159 | } -------------------------------------------------------------------------------- /.pipelines/abtest.yml: -------------------------------------------------------------------------------- 1 | # Pipeline for the canary deployment workflow. 2 | 3 | resources: 4 | containers: 5 | - container: mlops 6 | image: mcr.microsoft.com/mlops/python:latest 7 | 8 | pr: none 9 | trigger: 10 | branches: 11 | include: 12 | - master 13 | paths: 14 | exclude: 15 | - docs/ 16 | - environment_setup/ 17 | - ml_service/util/create_scoring_image.* 18 | - ml_service/util/smoke_test_scoring_service.py 19 | 20 | variables: 21 | - template: diabetes_regression-variables-template.yml 22 | - group: 'devopsforai-aml-vg' 23 | - name: 'helmVersion' 24 | value: 'v3.1.1' 25 | - name: 'helmDownloadURL' 26 | value: 'https://get.helm.sh/helm-$HELM_VERSION-linux-amd64.tar.gz' 27 | - name: 'blueReleaseName' 28 | value: 'model-blue' 29 | - name: 'greenReleaseName' 30 | value: 'model-green' 31 | - name: 'SCORE_SCRIPT' 32 | value: 'scoring/scoreA.py' 33 | 34 | stages: 35 | - stage: 'Building' 36 | jobs: 37 | - job: "Build_Scoring_image" 38 | timeoutInMinutes: 0 39 | pool: 40 | vmImage: 'ubuntu-latest' 41 | container: mlops 42 | steps: 43 | - task: AzureCLI@1 44 | inputs: 45 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 46 | scriptLocation: inlineScript 47 | inlineScript: | 48 | set -e 49 | export SUBSCRIPTION_ID=$(az account show --query id -o tsv) 50 | python -m ml_service.util.create_scoring_image --output_image_location_file image_location.txt 51 | displayName: 'Create Scoring Image' 52 | name: 'buildscoringimage' 53 | 54 | - publish: image_location.txt 55 | artifact: image_location 56 | 57 | - publish: $(System.DefaultWorkingDirectory)/charts 58 | artifact: allcharts 59 | 60 | - stage: 'Blue_Staging' 61 | jobs: 62 | - deployment: "Deploy_to_Staging" 63 | timeoutInMinutes: 0 64 | environment: abtestenv 65 | strategy: 66 | runOnce: 67 | deploy: 68 | steps: 69 | - script: | 70 | IMAGE_LOCATION="$(cat $(Pipeline.Workspace)/image_location/image_location.txt)" 71 | echo "##vso[task.setvariable variable=IMAGE_LOCATION]$IMAGE_LOCATION" 72 | displayName: 'Get Image Location' 73 | - template: helm-upgrade-template.yml 74 | parameters: 75 | chartPath: '$(Pipeline.Workspace)/allcharts/abtest-model' 76 | releaseName: $(blueReleaseName) 77 | overrideValues: 'deployment.name=$(blueReleaseName),deployment.bluegreen=blue,deployment.image.name=$(IMAGE_LOCATION)' 78 | 79 | - stage: 'Blue_50' 80 | jobs: 81 | - job: 'Blue_Rollout_50' 82 | displayName: 50 50 rollout to blue environment 83 | timeoutInMinutes: 0 84 | steps: 85 | - template: helm-upgrade-template.yml 86 | parameters: 87 | chartPath: '$(System.DefaultWorkingDirectory)/charts/abtest-istio' 88 | releaseName: 'abtest-istio' 89 | overrideValues: 'weight.blue=50,weight.green=50' 90 | 91 | - stage: 'Blue_100' 92 | jobs: 93 | - deployment: 'blue_Rollout_100' 94 | timeoutInMinutes: 0 95 | environment: abtestenv 96 | strategy: 97 | runOnce: 98 | deploy: 99 | steps: 100 | - template: helm-upgrade-template.yml 101 | parameters: 102 | chartPath: '$(Pipeline.Workspace)/allcharts/abtest-istio' 103 | releaseName: 'abtest-istio' 104 | overrideValues: 'weight.blue=100,weight.green=0' 105 | 106 | - stage: 'Rollback' 107 | dependsOn: 'Blue_100' 108 | condition: failed() 109 | jobs: 110 | - deployment: 'Roll_Back' 111 | displayName: 'Roll Back after failure' 112 | environment: abtestenv 113 | strategy: 114 | runOnce: 115 | deploy: 116 | steps: 117 | - template: helm-upgrade-template.yml 118 | parameters: 119 | chartPath: '$(Pipeline.Workspace)/allcharts/abtest-istio' 120 | releaseName: 'abtest-istio' 121 | overrideValues: 'weight.blue=0,weight.green=100' 122 | 123 | - stage: 'Set_Production_Tag' 124 | dependsOn: 'Blue_100' 125 | condition: succeeded() 126 | jobs: 127 | - deployment: 'green_blue_tagging' 128 | timeoutInMinutes: 0 129 | environment: abtestenv 130 | strategy: 131 | runOnce: 132 | deploy: 133 | steps: 134 | - script: | 135 | IMAGE_LOCATION="$(cat $(Pipeline.Workspace)/image_location/image_location.txt)" 136 | echo "##vso[task.setvariable variable=IMAGE_LOCATION]$IMAGE_LOCATION" 137 | displayName: 'Get Image Location' 138 | - template: helm-upgrade-template.yml 139 | parameters: 140 | chartPath: '$(Pipeline.Workspace)/allcharts/abtest-model' 141 | releaseName: $(greenReleaseName) 142 | overrideValues: 'deployment.name=$(greenReleaseName),deployment.bluegreen=green,deployment.image.name=$(IMAGE_LOCATION)' 143 | 144 | - stage: 'Green_100' 145 | jobs: 146 | - job: 'Prod_Rollout_100' 147 | timeoutInMinutes: 0 148 | steps: 149 | - template: helm-upgrade-template.yml 150 | parameters: 151 | chartPath: '$(System.DefaultWorkingDirectory)/charts/abtest-istio' 152 | releaseName: 'abtest-istio' 153 | overrideValues: 'weight.blue=0,weight.green=100' 154 | 155 | - stage: 'Disable_blue' 156 | condition: always() 157 | jobs: 158 | - job: 'blue_disable' 159 | timeoutInMinutes: 0 160 | steps: 161 | - template: helm-install-template.yml 162 | - task: HelmDeploy@0 163 | displayName: 'helm uninstall blue' 164 | inputs: 165 | connectionType: 'Kubernetes Service Connection' 166 | kubernetesServiceConnection: $(K8S_AB_SERVICE_CONNECTION) 167 | command: delete 168 | arguments: $(blueReleaseName) --namespace $(K8S_AB_NAMESPACE) 169 | -------------------------------------------------------------------------------- /ml_service/util/env_variables.py: -------------------------------------------------------------------------------- 1 | """Env dataclass to load and hold all environment variables 2 | """ 3 | from dataclasses import dataclass 4 | import os 5 | from typing import Optional 6 | 7 | from dotenv import load_dotenv 8 | 9 | 10 | @dataclass(frozen=True) 11 | class Env: 12 | """Loads all environment variables into a predefined set of properties 13 | """ 14 | 15 | # to load .env file into environment variables for local execution 16 | load_dotenv() 17 | workspace_name: Optional[str] = os.environ.get("WORKSPACE_NAME") 18 | resource_group: Optional[str] = os.environ.get("RESOURCE_GROUP") 19 | subscription_id: Optional[str] = os.environ.get("SUBSCRIPTION_ID") 20 | tenant_id: Optional[str] = os.environ.get("TENANT_ID") 21 | app_id: Optional[str] = os.environ.get("SP_APP_ID") 22 | app_secret: Optional[str] = os.environ.get("SP_APP_SECRET") 23 | vm_size: Optional[str] = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") 24 | compute_name: Optional[str] = os.environ.get("AML_COMPUTE_CLUSTER_NAME") 25 | vm_priority: Optional[str] = os.environ.get( 26 | "AML_CLUSTER_PRIORITY", "lowpriority" 27 | ) # NOQA: E501 28 | min_nodes: int = int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)) 29 | max_nodes: int = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)) 30 | build_id: Optional[str] = os.environ.get("BUILD_BUILDID") 31 | pipeline_name: Optional[str] = os.environ.get("TRAINING_PIPELINE_NAME") 32 | sources_directory_train: Optional[str] = os.environ.get( 33 | "SOURCES_DIR_TRAIN" 34 | ) # NOQA: E501 35 | train_script_path: Optional[str] = os.environ.get("TRAIN_SCRIPT_PATH") 36 | evaluate_script_path: Optional[str] = os.environ.get( 37 | "EVALUATE_SCRIPT_PATH" 38 | ) # NOQA: E501 39 | register_script_path: Optional[str] = os.environ.get( 40 | "REGISTER_SCRIPT_PATH" 41 | ) # NOQA: E501 42 | model_name: Optional[str] = os.environ.get("MODEL_NAME") 43 | experiment_name: Optional[str] = os.environ.get("EXPERIMENT_NAME") 44 | model_version: Optional[str] = os.environ.get("MODEL_VERSION") 45 | image_name: Optional[str] = os.environ.get("IMAGE_NAME") 46 | db_cluster_id: Optional[str] = os.environ.get("DB_CLUSTER_ID") 47 | score_script: Optional[str] = os.environ.get("SCORE_SCRIPT") 48 | build_uri: Optional[str] = os.environ.get("BUILD_URI") 49 | dataset_name: Optional[str] = os.environ.get("DATASET_NAME") 50 | datastore_name: Optional[str] = os.environ.get("DATASTORE_NAME") 51 | dataset_version: Optional[str] = os.environ.get("DATASET_VERSION") 52 | run_evaluation: Optional[str] = os.environ.get("RUN_EVALUATION", "true") 53 | allow_run_cancel: Optional[str] = os.environ.get( 54 | "ALLOW_RUN_CANCEL", "true" 55 | ) # NOQA: E501 56 | aml_env_name: Optional[str] = os.environ.get("AML_ENV_NAME") 57 | aml_env_train_conda_dep_file: Optional[str] = os.environ.get( 58 | "AML_ENV_TRAIN_CONDA_DEP_FILE", "conda_dependencies.yml" 59 | ) 60 | rebuild_env: Optional[bool] = os.environ.get( 61 | "AML_REBUILD_ENVIRONMENT", "false" 62 | ).lower().strip() == "true" 63 | 64 | use_gpu_for_scoring: Optional[bool] = os.environ.get( 65 | "USE_GPU_FOR_SCORING", "false" 66 | ).lower().strip() == "true" 67 | aml_env_score_conda_dep_file: Optional[str] = os.environ.get( 68 | "AML_ENV_SCORE_CONDA_DEP_FILE", "conda_dependencies_scoring.yml" 69 | ) 70 | aml_env_scorecopy_conda_dep_file: Optional[str] = os.environ.get( 71 | "AML_ENV_SCORECOPY_CONDA_DEP_FILE", "conda_dependencies_scorecopy.yml" 72 | ) 73 | vm_size_scoring: Optional[str] = os.environ.get( 74 | "AML_COMPUTE_CLUSTER_CPU_SKU_SCORING" 75 | ) 76 | compute_name_scoring: Optional[str] = os.environ.get( 77 | "AML_COMPUTE_CLUSTER_NAME_SCORING" 78 | ) 79 | vm_priority_scoring: Optional[str] = os.environ.get( 80 | "AML_CLUSTER_PRIORITY_SCORING", "lowpriority" 81 | ) 82 | min_nodes_scoring: int = int( 83 | os.environ.get("AML_CLUSTER_MIN_NODES_SCORING", 0) 84 | ) # NOQA: E501 85 | max_nodes_scoring: int = int( 86 | os.environ.get("AML_CLUSTER_MAX_NODES_SCORING", 4) 87 | ) # NOQA: E501 88 | rebuild_env_scoring: Optional[bool] = os.environ.get( 89 | "AML_REBUILD_ENVIRONMENT_SCORING", "false" 90 | ).lower().strip() == "true" 91 | scoring_datastore_storage_name: Optional[str] = os.environ.get( 92 | "SCORING_DATASTORE_STORAGE_NAME" 93 | ) 94 | scoring_datastore_access_key: Optional[str] = os.environ.get( 95 | "SCORING_DATASTORE_ACCESS_KEY" 96 | ) 97 | scoring_datastore_input_container: Optional[str] = os.environ.get( 98 | "SCORING_DATASTORE_INPUT_CONTAINER" 99 | ) 100 | scoring_datastore_input_filename: Optional[str] = os.environ.get( 101 | "SCORING_DATASTORE_INPUT_FILENAME" 102 | ) 103 | scoring_datastore_output_container: Optional[str] = os.environ.get( 104 | "SCORING_DATASTORE_OUTPUT_CONTAINER" 105 | ) 106 | scoring_datastore_output_filename: Optional[str] = os.environ.get( 107 | "SCORING_DATASTORE_OUTPUT_FILENAME" 108 | ) 109 | scoring_dataset_name: Optional[str] = os.environ.get( 110 | "SCORING_DATASET_NAME" 111 | ) # NOQA: E501 112 | scoring_pipeline_name: Optional[str] = os.environ.get( 113 | "SCORING_PIPELINE_NAME" 114 | ) # NOQA: E501 115 | aml_env_name_scoring: Optional[str] = os.environ.get( 116 | "AML_ENV_NAME_SCORING" 117 | ) # NOQA: E501 118 | aml_env_name_score_copy: Optional[str] = os.environ.get( 119 | "AML_ENV_NAME_SCORE_COPY" 120 | ) # NOQA: E501 121 | batchscore_script_path: Optional[str] = os.environ.get( 122 | "BATCHSCORE_SCRIPT_PATH" 123 | ) # NOQA: E501 124 | batchscore_copy_script_path: Optional[str] = os.environ.get( 125 | "BATCHSCORE_COPY_SCRIPT_PATH" 126 | ) # NOQA: E501 127 | -------------------------------------------------------------------------------- /diabetes_regression/scoring/parallel_batchscore.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) Microsoft Corporation. All rights reserved.​ 3 | ​ 4 | Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, 5 | royalty-free right to use, copy, and modify the software code provided by us 6 | ("Software Code"). You may not sublicense the Software Code or any use of it 7 | (except to your affiliates and to vendors to perform work on your behalf) 8 | through distribution, network access, service agreement, lease, rental, or 9 | otherwise. This license does not purport to express any claim of ownership over 10 | data you may have shared with Microsoft in the creation of the Software Code. 11 | Unless applicable law gives you more rights, Microsoft reserves all other 12 | rights not expressly granted herein, whether by implication, estoppel or 13 | otherwise. ​ 14 | ​ 15 | THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 21 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 22 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | """ 26 | 27 | import numpy as np 28 | import pandas as pd 29 | import joblib 30 | import sys 31 | from typing import List 32 | from util.model_helper import get_model 33 | from azureml.core import Model 34 | 35 | model = None 36 | 37 | 38 | def parse_args() -> List[str]: 39 | """ 40 | The AML pipeline calls this file with a set of additional command 41 | line arguments whose names are not documented. As such using the 42 | ArgumentParser which necessitates that we supply the names of the 43 | arguments is risky should those undocumented names change. Hence 44 | we parse the arguments manually. 45 | 46 | :returns: List of model filters 47 | 48 | :raises: ValueError 49 | """ 50 | model_name_param = [ 51 | (sys.argv[idx], sys.argv[idx + 1]) 52 | for idx, itm in enumerate(sys.argv) 53 | if itm == "--model_name" 54 | ] 55 | 56 | if len(model_name_param) == 0: 57 | raise ValueError( 58 | "Model name is required but no model name parameter was passed to the script" # NOQA: E501 59 | ) 60 | 61 | model_name = model_name_param[0][1] 62 | 63 | model_version_param = [ 64 | (sys.argv[idx], sys.argv[idx + 1]) 65 | for idx, itm in enumerate(sys.argv) 66 | if itm == "--model_version" 67 | ] 68 | model_version = ( 69 | None 70 | if len(model_version_param) < 1 71 | or len(model_version_param[0][1].strip()) == 0 # NOQA: E501 72 | else model_version_param[0][1] 73 | ) 74 | 75 | model_tag_name_param = [ 76 | (sys.argv[idx], sys.argv[idx + 1]) 77 | for idx, itm in enumerate(sys.argv) 78 | if itm == "--model_tag_name" 79 | ] 80 | model_tag_name = ( 81 | None 82 | if len(model_tag_name_param) < 1 83 | or len(model_tag_name_param[0][1].strip()) == 0 # NOQA: E501 84 | else model_tag_name_param[0][1] 85 | ) 86 | 87 | model_tag_value_param = [ 88 | (sys.argv[idx], sys.argv[idx + 1]) 89 | for idx, itm in enumerate(sys.argv) 90 | if itm == "--model_tag_value" 91 | ] 92 | model_tag_value = ( 93 | None 94 | if len(model_tag_value_param) < 1 95 | or len(model_tag_name_param[0][1].strip()) == 0 96 | else model_tag_value_param[0][1] 97 | ) 98 | 99 | return [model_name, model_version, model_tag_name, model_tag_value] 100 | 101 | 102 | def init(): 103 | """ 104 | Initializer called once per node that runs the scoring job. Parse command 105 | line arguments and get the right model to use for scoring. 106 | """ 107 | try: 108 | print("Initializing batch scoring script...") 109 | 110 | # Get the model using name/version/tags filter 111 | model_filter = parse_args() 112 | amlmodel = get_model( 113 | model_name=model_filter[0], 114 | model_version=model_filter[1], 115 | tag_name=model_filter[2], 116 | tag_value=model_filter[3]) 117 | 118 | # Load the model using name/version found 119 | global model 120 | modelpath = Model.get_model_path( 121 | model_name=amlmodel.name, version=amlmodel.version) 122 | model = joblib.load(modelpath) 123 | print("Loaded model {}".format(model_filter[0])) 124 | except Exception as ex: 125 | print("Error: {}".format(ex)) 126 | 127 | 128 | def run(mini_batch: pd.DataFrame) -> pd.DataFrame: 129 | """ 130 | The run method is called multiple times by the runtime. Each time 131 | a mini-batch consisting of a portion of the input data is passed 132 | in as a pandas DataFrame. The run method should return the scoring 133 | results as a List or a pandas DataFrame. 134 | 135 | :param mini_batch: Dataframe containing a portion of the scoring data 136 | 137 | :returns: array containing the scores. 138 | """ 139 | 140 | try: 141 | result = None 142 | 143 | for _, sample in mini_batch.iterrows(): 144 | # prediction 145 | pred = model.predict(sample.values.reshape(1, -1)) 146 | result = ( 147 | np.array(pred) if result is None else np.vstack((result, pred)) 148 | ) # NOQA: E501 149 | 150 | return ( 151 | [] 152 | if result is None 153 | else mini_batch.join(pd.DataFrame(result, columns=["score"])) 154 | ) 155 | 156 | except Exception as ex: 157 | print(ex) 158 | -------------------------------------------------------------------------------- /experimentation/Diabetes Ridge Regression Parameter Experimentation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Experiment with parameters for a Ridge Regression Model on the Diabetes Dataset" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This notebook is for experimenting with different parameters to train a ridge regression model on the Diabetes dataset." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "# Change out of the experimentation directory\n", 24 | "%cd .." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import azureml.core\n", 34 | "from azureml.core import Workspace" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# Load the workspace from the saved config file\n", 44 | "ws = Workspace.from_config()" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "import os, shutil\n", 54 | "\n", 55 | "# Create a folder for the experiment files\n", 56 | "training_folder = 'diabetes-training'\n", 57 | "os.makedirs(training_folder, exist_ok=True)\n", 58 | "\n", 59 | "# Copy the data file into the experiment folder\n", 60 | "shutil.copy('data/diabetes.csv', os.path.join(training_folder, \"diabetes.csv\"))\n", 61 | "\n", 62 | "# Copy the train functions into the experiment folder\n", 63 | "shutil.copy('diabetes_regression/training/train.py', os.path.join(training_folder, \"train.py\"))" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "%%writefile $training_folder/parameters.json\n", 73 | "{\n", 74 | " \"training\":\n", 75 | " {\n", 76 | " \"alpha\": 0.3\n", 77 | " },\n", 78 | " \"evaluation\":\n", 79 | " {\n", 80 | "\n", 81 | " },\n", 82 | " \"scoring\":\n", 83 | " {\n", 84 | " \n", 85 | " }\n", 86 | "}\n" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "%%writefile $training_folder/diabetes_training.py\n", 96 | "# Import libraries\n", 97 | "from azureml.core import Run\n", 98 | "import json\n", 99 | "import os\n", 100 | "import pandas as pd\n", 101 | "import shutil\n", 102 | "\n", 103 | "from train import split_data, train_model\n", 104 | "\n", 105 | "# Get the experiment run context\n", 106 | "run = Run.get_context()\n", 107 | "\n", 108 | "# load the diabetes dataset\n", 109 | "print(\"Loading Data...\")\n", 110 | "train_df = pd.read_csv('diabetes.csv')\n", 111 | "\n", 112 | "data = split_data(train_df)\n", 113 | "\n", 114 | "# Specify the parameters to test\n", 115 | "with open(\"parameters.json\") as f:\n", 116 | " pars = json.load(f)\n", 117 | " train_args = pars[\"training\"]\n", 118 | "\n", 119 | "# Log parameters\n", 120 | "for k, v in train_args.items():\n", 121 | " run.log(k, v)\n", 122 | "\n", 123 | "model, metrics = train_model(data, train_args)\n", 124 | "\n", 125 | "# Log metrics\n", 126 | "for k, v in metrics.items():\n", 127 | " run.log(k, v)\n", 128 | "\n", 129 | "# Save the parameters file to the outputs folder\n", 130 | "os.makedirs('outputs', exist_ok=True)\n", 131 | "shutil.copy('parameters.json', os.path.join('outputs', 'parameters.json'))\n", 132 | " \n", 133 | "run.complete()" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "from azureml.train.estimator import Estimator\n", 143 | "from azureml.core import Experiment\n", 144 | "\n", 145 | "# Create an estimator\n", 146 | "estimator = Estimator(source_directory=training_folder,\n", 147 | " entry_script='diabetes_training.py',\n", 148 | " compute_target='local',\n", 149 | " conda_packages=['scikit-learn']\n", 150 | " )\n", 151 | "\n", 152 | "# Create an experiment\n", 153 | "experiment_name = 'diabetes-training'\n", 154 | "experiment = Experiment(workspace = ws, name = experiment_name)\n", 155 | "\n", 156 | "# Run the experiment based on the estimator\n", 157 | "run = experiment.submit(config=estimator)\n", 158 | "run.wait_for_completion(show_output=True)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "metrics = run.get_metrics()\n", 168 | "for k, v in metrics.items():\n", 169 | " print(k, v)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "for file in run.get_file_names():\n", 179 | " print(file)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3.6.10 64-bit ('OH3': conda)", 193 | "language": "python", 194 | "name": "python361064bitoh3conda5f7beeba8c1d407187c86667ecfb684f" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.6.10" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 2 211 | } 212 | -------------------------------------------------------------------------------- /diabetes_regression/evaluate/evaluate_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) Microsoft Corporation. All rights reserved.​ 3 | ​ 4 | Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, 5 | royalty-free right to use, copy, and modify the software code provided by us 6 | ("Software Code"). You may not sublicense the Software Code or any use of it 7 | (except to your affiliates and to vendors to perform work on your behalf) 8 | through distribution, network access, service agreement, lease, rental, or 9 | otherwise. This license does not purport to express any claim of ownership over 10 | data you may have shared with Microsoft in the creation of the Software Code. 11 | Unless applicable law gives you more rights, Microsoft reserves all other 12 | rights not expressly granted herein, whether by implication, estoppel or 13 | otherwise. ​ 14 | ​ 15 | THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 21 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 22 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | """ 26 | from azureml.core import Run 27 | import argparse 28 | import traceback 29 | from util.model_helper import get_model 30 | 31 | run = Run.get_context() 32 | 33 | # if you would like to run this script on a local computer 34 | # the following code is a good starting point for you 35 | # use 36 | # python -m evaluate.evaluate_model 37 | # in diabetes_regression folder context 38 | 39 | # if (run.id.startswith('OfflineRun')): 40 | # from dotenv import load_dotenv 41 | # # For local development, set values in this section 42 | # load_dotenv() 43 | # sources_dir = os.environ.get("SOURCES_DIR_TRAIN") 44 | # if (sources_dir is None): 45 | # sources_dir = 'diabetes_regression' 46 | # path_to_util = os.path.join(".", sources_dir, "util") 47 | # sys.path.append(os.path.abspath(path_to_util)) # NOQA: E402 48 | # from model_helper import get_model 49 | # workspace_name = os.environ.get("WORKSPACE_NAME") 50 | # experiment_name = os.environ.get("EXPERIMENT_NAME") 51 | # resource_group = os.environ.get("RESOURCE_GROUP") 52 | # subscription_id = os.environ.get("SUBSCRIPTION_ID") 53 | # tenant_id = os.environ.get("TENANT_ID") 54 | # model_name = os.environ.get("MODEL_NAME") 55 | # app_id = os.environ.get('SP_APP_ID') 56 | # app_secret = os.environ.get('SP_APP_SECRET') 57 | # build_id = os.environ.get('BUILD_BUILDID') 58 | # # run_id useful to query previous runs 59 | # run_id = "57fee47f-5ae8-441c-bc0c-d4c371f32d70" 60 | 61 | # aml_workspace = Workspace.get( 62 | # name=workspace_name, 63 | # subscription_id=subscription_id, 64 | # resource_group=resource_group 65 | # ) 66 | # ws = aml_workspace 67 | # exp = Experiment(ws, experiment_name) 68 | 69 | # comment the following three lines 70 | # if you would like to use Offline mode 71 | exp = run.experiment 72 | ws = run.experiment.workspace 73 | run_id = 'amlcompute' 74 | 75 | parser = argparse.ArgumentParser("evaluate") 76 | 77 | parser.add_argument( 78 | "--run_id", 79 | type=str, 80 | help="Training run ID", 81 | ) 82 | parser.add_argument( 83 | "--model_name", 84 | type=str, 85 | help="Name of the Model", 86 | default="diabetes_model.pkl", 87 | ) 88 | 89 | parser.add_argument( 90 | "--allow_run_cancel", 91 | type=str, 92 | help="Set this to false to avoid evaluation step from cancelling run after an unsuccessful evaluation", # NOQA: E501 93 | default="true", 94 | ) 95 | 96 | args = parser.parse_args() 97 | if (args.run_id is not None): 98 | run_id = args.run_id 99 | if (run_id == 'amlcompute'): 100 | run_id = run.parent.id 101 | model_name = args.model_name 102 | metric_eval = "mse" 103 | 104 | allow_run_cancel = args.allow_run_cancel 105 | # Parameterize the matrices on which the models should be compared 106 | # Add golden data set on which all the model performance can be evaluated 107 | try: 108 | firstRegistration = False 109 | tag_name = 'experiment_name' 110 | 111 | model = get_model( 112 | model_name=model_name, 113 | tag_name=tag_name, 114 | tag_value=exp.name, 115 | aml_workspace=ws) 116 | 117 | if (model is not None): 118 | production_model_mse = 10000 119 | if (metric_eval in model.tags): 120 | production_model_mse = float(model.tags[metric_eval]) 121 | new_model_mse = float(run.parent.get_metrics().get(metric_eval)) 122 | if (production_model_mse is None or new_model_mse is None): 123 | print("Unable to find", metric_eval, "metrics, " 124 | "exiting evaluation") 125 | if((allow_run_cancel).lower() == 'true'): 126 | run.parent.cancel() 127 | else: 128 | print( 129 | "Current Production model mse: {}, " 130 | "New trained model mse: {}".format( 131 | production_model_mse, new_model_mse 132 | ) 133 | ) 134 | 135 | if (new_model_mse < production_model_mse): 136 | print("New trained model performs better, " 137 | "thus it should be registered") 138 | else: 139 | print("New trained model metric is worse than or equal to " 140 | "production model so skipping model registration.") 141 | if((allow_run_cancel).lower() == 'true'): 142 | run.parent.cancel() 143 | else: 144 | print("This is the first model, " 145 | "thus it should be registered") 146 | 147 | except Exception: 148 | traceback.print_exc(limit=None, file=None, chain=True) 149 | print("Something went wrong trying to evaluate. Exiting.") 150 | raise 151 | -------------------------------------------------------------------------------- /docs/custom_container.md: -------------------------------------------------------------------------------- 1 | # Customizing the Azure DevOps job container 2 | 3 | The Model training and deployment pipeline uses a Docker container 4 | on the Azure Pipelines agents to provide a reproducible environment 5 | to run test and deployment code. 6 | The image of the container 7 | `mcr.microsoft.com/mlops/python:latest` is built with this 8 | [Dockerfile](../environment_setup/Dockerfile). 9 | 10 | Additionally mcr.microsoft.com/mlops/python image is also tagged with below tags. 11 | 12 | | Image Tags | Description | 13 | | ----------------------------------------------- | :---------------------------------------------------------------------------------------- | 14 | | mcr.microsoft.com/mlops/python:latest | latest image | 15 | | mcr.microsoft.com/mlops/python:build-[id] | where [id] is Azure Devops build id e.g. mcr.microsoft.com/mlops/python:build-20200325.1 | 16 | | mcr.microsoft.com/mlops/python:amlsdk-[version] | where [version] is aml sdk version e.g. mcr.microsoft.com/mlops/python:amlsdk-1.1.5.1 | 17 | | mcr.microsoft.com/mlops/python:release-[id] | where [id] is github release id e.g. mcr.microsoft.com/mlops/python:release-3.0.0 | | 18 | 19 | In your project you will want to build your own 20 | Docker image that only contains the dependencies and tools required for your 21 | use case. This image will be more likely smaller and therefore faster, and it 22 | will be totally maintained by your team. 23 | 24 | ## Provision an Azure Container Registry 25 | 26 | An Azure Container Registry is deployed along your Azure ML Workspace to manage models. 27 | You can use that registry instance to store your MLOps container image as well, or 28 | provision a separate instance. 29 | 30 | ## Create a Registry Service Connection 31 | 32 | [Create a service connection](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops&tabs=yaml#sep-docreg) to your Azure Container Registry: 33 | 34 | - As *Connection type*, select *Docker Registry* 35 | - As *Registry type*, select *Azure Container Registry* 36 | - As *Azure container registry*, select your Container registry instance 37 | - As *Service connection name*, enter `acrconnection` 38 | 39 | ## Update the environment definition 40 | 41 | Modify the [Dockerfile](../environment_setup/Dockerfile) and/or the 42 | [ci_dependencies.yml](../diabetes_regression/ci_dependencies.yml) CI Conda 43 | environment definition to tailor your environment. 44 | Conda provides a [reusable environment for training and deployment with Azure Machine Learning](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-use-environments). 45 | The Conda environment used for CI should use the same package versions as the Conda environment 46 | used for the Azure ML training and scoring environments (defined in [conda_dependencies.yml](../diabetes_regression/conda_dependencies.yml)). 47 | This enables you to run unit and integration tests using the exact same dependencies as used in the ML pipeline. 48 | 49 | If a package is available in a Conda package repository, then we recommend that 50 | you use the Conda installation rather than the pip installation. Conda packages 51 | typically come with prebuilt binaries that make installation more reliable. 52 | 53 | ## Create a container build pipeline 54 | 55 | In your [Azure DevOps](https://dev.azure.com) project create a new build 56 | pipeline referring to the 57 | [environment_setup/docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) 58 | pipeline definition in your forked repository. 59 | 60 | Edit the [environment_setup/docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) file 61 | and modify the string `'public/mlops/python'` with an name suitable to describe your environment, 62 | e.g. `'mlops/diabetes_regression'`. 63 | 64 | Save and run the pipeline, making sure to set the these runtime variables: `amlsdkversion` and `githubrelease`. The values are up to you to set depending on your environment. These will show as tags on your image. 65 | 66 | ![Custom Container Vars](./images/custom-container-variables.png) 67 | 68 | This will build and push a container image to your Azure Container Registry with 69 | the name you have just edited. The next step is to modify the build pipeline to run the CI job on a container 70 | run from that image. 71 | 72 | ## Modify the model pipeline 73 | 74 | Modify the model pipeline file [diabetes_regression-ci.yml](../.pipelines/diabetes_regression-ci.yml) by replacing this section: 75 | 76 | ``` 77 | resources: 78 | containers: 79 | - container: mlops 80 | image: mcr.microsoft.com/mlops/python:latest 81 | ``` 82 | 83 | with (using the image name previously defined): 84 | 85 | ``` 86 | resources: 87 | containers: 88 | - container: mlops 89 | image: mlops/diabetes_regression 90 | endpoint: acrconnection 91 | ``` 92 | 93 | Run the pipeline and ensure your container has been used. 94 | 95 | ## Addressing conflicting dependencies 96 | 97 | Especially when working in a team, it's possible for environment changes across branches to interfere with one another. 98 | 99 | For example, if the master branch is using scikit-learn and you create a branch to use Tensorflow instead, and you 100 | decide to remove scikit-learn from the 101 | [ci_dependencies.yml](../diabetes_regression/ci_dependencies.yml) Conda environment definition 102 | and run the [docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) Docker image, 103 | then the master branch will stop building. 104 | 105 | You could leave scikit-learn in addition to Tensorflow in the environment, but that is not ideal, as you would have to take an extra step to remove scikit-learn after merging your branch to master. 106 | 107 | A better approach would be to use a distinct name for your modified environment, such as `mlops/diabetes_regression/tensorflow`. 108 | By changing the name of the image in your branch in both the container build pipeline 109 | [environment_setup/docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) 110 | and the model pipeline file 111 | [diabetes_regression-ci.yml](../.pipelines/diabetes_regression-ci.yml), 112 | and running both pipelines in sequence on your branch, 113 | you avoid any branch conflicts, and the name does not have to be changed after merging to master. 114 | -------------------------------------------------------------------------------- /diabetes_regression/training/train_aml.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) Microsoft Corporation. All rights reserved.​ 3 | ​ 4 | Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, 5 | royalty-free right to use, copy, and modify the software code provided by us 6 | ("Software Code"). You may not sublicense the Software Code or any use of it 7 | (except to your affiliates and to vendors to perform work on your behalf) 8 | through distribution, network access, service agreement, lease, rental, or 9 | otherwise. This license does not purport to express any claim of ownership over 10 | data you may have shared with Microsoft in the creation of the Software Code. 11 | Unless applicable law gives you more rights, Microsoft reserves all other 12 | rights not expressly granted herein, whether by implication, estoppel or 13 | otherwise. ​ 14 | ​ 15 | THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 21 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 22 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | """ 26 | from azureml.core.run import Run 27 | from azureml.core import Dataset, Datastore, Workspace 28 | import os 29 | import argparse 30 | import joblib 31 | import json 32 | from train import split_data, train_model, get_model_metrics 33 | 34 | 35 | def register_dataset( 36 | aml_workspace: Workspace, 37 | dataset_name: str, 38 | datastore_name: str, 39 | file_path: str 40 | ) -> Dataset: 41 | datastore = Datastore.get(aml_workspace, datastore_name) 42 | dataset = Dataset.Tabular.from_delimited_files(path=(datastore, file_path)) 43 | dataset = dataset.register(workspace=aml_workspace, 44 | name=dataset_name, 45 | create_new_version=True) 46 | 47 | return dataset 48 | 49 | 50 | def main(): 51 | print("Running train_aml.py") 52 | 53 | parser = argparse.ArgumentParser("train") 54 | parser.add_argument( 55 | "--model_name", 56 | type=str, 57 | help="Name of the Model", 58 | default="diabetes_model.pkl", 59 | ) 60 | 61 | parser.add_argument( 62 | "--step_output", 63 | type=str, 64 | help=("output for passing data to next step") 65 | ) 66 | 67 | parser.add_argument( 68 | "--dataset_version", 69 | type=str, 70 | help=("dataset version") 71 | ) 72 | 73 | parser.add_argument( 74 | "--data_file_path", 75 | type=str, 76 | help=("data file path, if specified,\ 77 | a new version of the dataset will be registered") 78 | ) 79 | 80 | parser.add_argument( 81 | "--caller_run_id", 82 | type=str, 83 | help=("caller run id, for example ADF pipeline run id") 84 | ) 85 | 86 | parser.add_argument( 87 | "--dataset_name", 88 | type=str, 89 | help=("Dataset name. Dataset must be passed by name\ 90 | to always get the desired dataset version\ 91 | rather than the one used while the pipeline creation") 92 | ) 93 | 94 | args = parser.parse_args() 95 | 96 | print("Argument [model_name]: %s" % args.model_name) 97 | print("Argument [step_output]: %s" % args.step_output) 98 | print("Argument [dataset_version]: %s" % args.dataset_version) 99 | print("Argument [data_file_path]: %s" % args.data_file_path) 100 | print("Argument [caller_run_id]: %s" % args.caller_run_id) 101 | print("Argument [dataset_name]: %s" % args.dataset_name) 102 | 103 | model_name = args.model_name 104 | step_output_path = args.step_output 105 | dataset_version = args.dataset_version 106 | data_file_path = args.data_file_path 107 | dataset_name = args.dataset_name 108 | 109 | run = Run.get_context() 110 | 111 | print("Getting training parameters") 112 | 113 | # Load the training parameters from the parameters file 114 | with open("parameters.json") as f: 115 | pars = json.load(f) 116 | try: 117 | train_args = pars["training"] 118 | except KeyError: 119 | print("Could not load training values from file") 120 | train_args = {} 121 | 122 | # Log the training parameters 123 | print(f"Parameters: {train_args}") 124 | for (k, v) in train_args.items(): 125 | run.log(k, v) 126 | run.parent.log(k, v) 127 | 128 | # Get the dataset 129 | if (dataset_name): 130 | if (data_file_path == 'none'): 131 | dataset = Dataset.get_by_name(run.experiment.workspace, dataset_name, dataset_version) # NOQA: E402, E501 132 | else: 133 | dataset = register_dataset(run.experiment.workspace, 134 | dataset_name, 135 | os.environ.get("DATASTORE_NAME"), 136 | data_file_path) 137 | else: 138 | e = ("No dataset provided") 139 | print(e) 140 | raise Exception(e) 141 | 142 | # Link dataset to the step run so it is trackable in the UI 143 | run.input_datasets['training_data'] = dataset 144 | run.parent.tag("dataset_id", value=dataset.id) 145 | 146 | # Split the data into test/train 147 | df = dataset.to_pandas_dataframe() 148 | data = split_data(df) 149 | 150 | # Train the model 151 | model = train_model(data, train_args) 152 | 153 | # Evaluate and log the metrics returned from the train function 154 | metrics = get_model_metrics(model, data) 155 | for (k, v) in metrics.items(): 156 | run.log(k, v) 157 | run.parent.log(k, v) 158 | 159 | # Pass model file to next step 160 | os.makedirs(step_output_path, exist_ok=True) 161 | model_output_path = os.path.join(step_output_path, model_name) 162 | joblib.dump(value=model, filename=model_output_path) 163 | 164 | # Also upload model file to run outputs for history 165 | os.makedirs('outputs', exist_ok=True) 166 | output_path = os.path.join('outputs', model_name) 167 | joblib.dump(value=model, filename=output_path) 168 | 169 | run.tag("run_type", value="train") 170 | print(f"tags now present for run: {run.tags}") 171 | 172 | run.complete() 173 | 174 | 175 | if __name__ == '__main__': 176 | main() 177 | -------------------------------------------------------------------------------- /.pipelines/diabetes_regression-cd.yml: -------------------------------------------------------------------------------- 1 | # Continuous Integration (CI) pipeline that orchestrates the deployment of the diabetes_regression model. 2 | 3 | # Runtime parameters to select artifacts 4 | parameters: 5 | - name : artifactBuildId 6 | displayName: Model Train CI Build ID. Default is 'latest'. 7 | type: string 8 | default: latest 9 | 10 | pr: none 11 | 12 | # Trigger this pipeline on model-train pipeline completion 13 | trigger: none 14 | resources: 15 | containers: 16 | - container: mlops 17 | image: mcr.microsoft.com/mlops/python:latest 18 | pipelines: 19 | - pipeline: model-train-ci 20 | source: Model-Train-Register-CI # Name of the triggering pipeline 21 | trigger: 22 | branches: 23 | include: 24 | - master 25 | 26 | variables: 27 | - template: diabetes_regression-variables-template.yml 28 | - group: devopsforai-aml-vg 29 | 30 | stages: 31 | - stage: 'Deploy_ACI' 32 | displayName: 'Deploy to ACI' 33 | condition: variables['ACI_DEPLOYMENT_NAME'] 34 | jobs: 35 | - job: "Deploy_ACI" 36 | displayName: "Deploy to ACI" 37 | container: mlops 38 | timeoutInMinutes: 0 39 | steps: 40 | - download: none 41 | - template: diabetes_regression-get-model-id-artifact-template.yml 42 | parameters: 43 | projectId: '$(resources.pipeline.model-train-ci.projectID)' 44 | pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)' 45 | artifactBuildId: ${{ parameters.artifactBuildId }} 46 | - task: AzureCLI@1 47 | displayName: 'Install AzureML CLI' 48 | inputs: 49 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 50 | scriptLocation: inlineScript 51 | workingDirectory: $(Build.SourcesDirectory) 52 | inlineScript: 'az extension add -n azure-cli-ml' 53 | - task: AzureCLI@1 54 | displayName: "Deploy to ACI (CLI)" 55 | inputs: 56 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 57 | scriptLocation: inlineScript 58 | workingDirectory: $(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring 59 | inlineScript: | 60 | set -e # fail on error 61 | 62 | az ml model deploy --name $(ACI_DEPLOYMENT_NAME) --model '$(MODEL_NAME):$(get_model.MODEL_VERSION)' \ 63 | --ic inference_config.yml \ 64 | --dc deployment_config_aci.yml \ 65 | -g $(RESOURCE_GROUP) --workspace-name $(WORKSPACE_NAME) \ 66 | --overwrite -v 67 | - task: AzureCLI@1 68 | displayName: 'Smoke test' 69 | inputs: 70 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 71 | scriptLocation: inlineScript 72 | inlineScript: | 73 | set -e # fail on error 74 | export SUBSCRIPTION_ID=$(az account show --query id -o tsv) 75 | python -m ml_service.util.smoke_test_scoring_service --type ACI --service "$(ACI_DEPLOYMENT_NAME)" 76 | 77 | - stage: 'Deploy_AKS' 78 | displayName: 'Deploy to AKS' 79 | dependsOn: Deploy_ACI 80 | condition: and(succeeded(), variables['AKS_DEPLOYMENT_NAME']) 81 | jobs: 82 | - job: "Deploy_AKS" 83 | displayName: "Deploy to AKS" 84 | container: mlops 85 | timeoutInMinutes: 0 86 | steps: 87 | - template: diabetes_regression-get-model-id-artifact-template.yml 88 | parameters: 89 | projectId: '$(resources.pipeline.model-train-ci.projectID)' 90 | pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)' 91 | artifactBuildId: ${{ parameters.artifactBuildId }} 92 | - task: AzureCLI@1 93 | displayName: 'Install AzureML CLI' 94 | inputs: 95 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 96 | scriptLocation: inlineScript 97 | workingDirectory: $(Build.SourcesDirectory) 98 | inlineScript: 'az extension add -n azure-cli-ml' 99 | - task: AzureCLI@1 100 | displayName: "Deploy to AKS (CLI)" 101 | inputs: 102 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 103 | scriptLocation: inlineScript 104 | workingDirectory: $(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring 105 | inlineScript: | 106 | set -e # fail on error 107 | 108 | az ml model deploy --name $(AKS_DEPLOYMENT_NAME) --model '$(MODEL_NAME):$(get_model.MODEL_VERSION)' \ 109 | --compute-target $(AKS_COMPUTE_NAME) \ 110 | --ic inference_config.yml \ 111 | --dc deployment_config_aks.yml \ 112 | -g $(RESOURCE_GROUP) --workspace-name $(WORKSPACE_NAME) \ 113 | --overwrite -v 114 | - task: AzureCLI@1 115 | displayName: 'Smoke test' 116 | inputs: 117 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 118 | scriptLocation: inlineScript 119 | inlineScript: | 120 | set -e # fail on error 121 | export SUBSCRIPTION_ID=$(az account show --query id -o tsv) 122 | python -m ml_service.util.smoke_test_scoring_service --type AKS --service "$(AKS_DEPLOYMENT_NAME)" 123 | 124 | - stage: 'Deploy_Webapp' 125 | displayName: 'Deploy to Webapp' 126 | condition: variables['WEBAPP_DEPLOYMENT_NAME'] 127 | jobs: 128 | - job: "Deploy_Webapp" 129 | displayName: "Package and deploy model" 130 | container: mlops 131 | timeoutInMinutes: 0 132 | steps: 133 | - template: diabetes_regression-get-model-id-artifact-template.yml 134 | parameters: 135 | projectId: '$(resources.pipeline.model-train-ci.projectID)' 136 | pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)' 137 | artifactBuildId: ${{ parameters.artifactBuildId }} 138 | - template: diabetes_regression-package-model-template.yml 139 | parameters: 140 | modelId: $(MODEL_NAME):$(get_model.MODEL_VERSION) 141 | scoringScriptPath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/score.py' 142 | condaFilePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/conda_dependencies.yml' 143 | - script: echo $(IMAGE_LOCATION) >image_location.txt 144 | displayName: "Write image location file" 145 | - task: AzureWebAppContainer@1 146 | name: WebAppDeploy 147 | displayName: 'Azure Web App on Container Deploy' 148 | inputs: 149 | azureSubscription: '$(AZURE_RM_SVC_CONNECTION)' 150 | appName: '$(WEBAPP_DEPLOYMENT_NAME)' 151 | resourceGroupName: '$(RESOURCE_GROUP)' 152 | imageName: '$(IMAGE_LOCATION)' 153 | - task: AzureCLI@1 154 | displayName: 'Smoke test' 155 | inputs: 156 | azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' 157 | scriptLocation: inlineScript 158 | inlineScript: | 159 | set -e # fail on error 160 | export SUBSCRIPTION_ID=$(az account show --query id -o tsv) 161 | python -m ml_service.util.smoke_test_scoring_service --type Webapp --service "$(WebAppDeploy.AppServiceApplicationUrl)/score" 162 | -------------------------------------------------------------------------------- /ml_service/pipelines/diabetes_regression_build_train_pipeline.py: -------------------------------------------------------------------------------- 1 | from azureml.pipeline.core.graph import PipelineParameter 2 | from azureml.pipeline.steps import PythonScriptStep 3 | from azureml.pipeline.core import Pipeline, PipelineData 4 | from azureml.core import Workspace, Dataset, Datastore 5 | from azureml.core.runconfig import RunConfiguration 6 | from ml_service.pipelines.load_sample_data import create_sample_data_csv 7 | from ml_service.util.attach_compute import get_compute 8 | from ml_service.util.env_variables import Env 9 | from ml_service.util.manage_environment import get_environment 10 | import os 11 | 12 | 13 | def main(): 14 | e = Env() 15 | # Get Azure machine learning workspace 16 | aml_workspace = Workspace.get( 17 | name=e.workspace_name, 18 | subscription_id=e.subscription_id, 19 | resource_group=e.resource_group, 20 | ) 21 | print("get_workspace:") 22 | print(aml_workspace) 23 | 24 | # Get Azure machine learning cluster 25 | aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) 26 | if aml_compute is not None: 27 | print("aml_compute:") 28 | print(aml_compute) 29 | 30 | # Create a reusable Azure ML environment 31 | environment = get_environment( 32 | aml_workspace, 33 | e.aml_env_name, 34 | conda_dependencies_file=e.aml_env_train_conda_dep_file, 35 | create_new=e.rebuild_env, 36 | ) # 37 | run_config = RunConfiguration() 38 | run_config.environment = environment 39 | 40 | if e.datastore_name: 41 | datastore_name = e.datastore_name 42 | else: 43 | datastore_name = aml_workspace.get_default_datastore().name 44 | run_config.environment.environment_variables[ 45 | "DATASTORE_NAME" 46 | ] = datastore_name # NOQA: E501 47 | 48 | model_name_param = PipelineParameter(name="model_name", default_value=e.model_name) # NOQA: E501 49 | dataset_version_param = PipelineParameter( 50 | name="dataset_version", default_value=e.dataset_version 51 | ) 52 | data_file_path_param = PipelineParameter( 53 | name="data_file_path", default_value="none" 54 | ) 55 | caller_run_id_param = PipelineParameter(name="caller_run_id", default_value="none") # NOQA: E501 56 | 57 | # Get dataset name 58 | dataset_name = e.dataset_name 59 | 60 | # Check to see if dataset exists 61 | if dataset_name not in aml_workspace.datasets: 62 | # This call creates an example CSV from sklearn sample data. If you 63 | # have already bootstrapped your project, you can comment this line 64 | # out and use your own CSV. 65 | create_sample_data_csv() 66 | 67 | # Use a CSV to read in the data set. 68 | file_name = "diabetes.csv" 69 | 70 | if not os.path.exists(file_name): 71 | raise Exception( 72 | 'Could not find CSV dataset at "%s". If you have bootstrapped your project, you will need to provide a CSV.' # NOQA: E501 73 | % file_name 74 | ) # NOQA: E501 75 | 76 | # Upload file to default datastore in workspace 77 | datatstore = Datastore.get(aml_workspace, datastore_name) 78 | target_path = "training-data/" 79 | datatstore.upload_files( 80 | files=[file_name], 81 | target_path=target_path, 82 | overwrite=True, 83 | show_progress=False, 84 | ) 85 | 86 | # Register dataset 87 | path_on_datastore = os.path.join(target_path, file_name) 88 | dataset = Dataset.Tabular.from_delimited_files( 89 | path=(datatstore, path_on_datastore) 90 | ) 91 | dataset = dataset.register( 92 | workspace=aml_workspace, 93 | name=dataset_name, 94 | description="diabetes training data", 95 | tags={"format": "CSV"}, 96 | create_new_version=True, 97 | ) 98 | 99 | # Create a PipelineData to pass data between steps 100 | pipeline_data = PipelineData( 101 | "pipeline_data", datastore=aml_workspace.get_default_datastore() 102 | ) 103 | 104 | train_step = PythonScriptStep( 105 | name="Train Model", 106 | script_name=e.train_script_path, 107 | compute_target=aml_compute, 108 | source_directory=e.sources_directory_train, 109 | outputs=[pipeline_data], 110 | arguments=[ 111 | "--model_name", 112 | model_name_param, 113 | "--step_output", 114 | pipeline_data, 115 | "--dataset_version", 116 | dataset_version_param, 117 | "--data_file_path", 118 | data_file_path_param, 119 | "--caller_run_id", 120 | caller_run_id_param, 121 | "--dataset_name", 122 | dataset_name, 123 | ], 124 | runconfig=run_config, 125 | allow_reuse=True, 126 | ) 127 | print("Step Train created") 128 | 129 | evaluate_step = PythonScriptStep( 130 | name="Evaluate Model ", 131 | script_name=e.evaluate_script_path, 132 | compute_target=aml_compute, 133 | source_directory=e.sources_directory_train, 134 | arguments=[ 135 | "--model_name", 136 | model_name_param, 137 | "--allow_run_cancel", 138 | e.allow_run_cancel, 139 | ], 140 | runconfig=run_config, 141 | allow_reuse=False, 142 | ) 143 | print("Step Evaluate created") 144 | 145 | register_step = PythonScriptStep( 146 | name="Register Model ", 147 | script_name=e.register_script_path, 148 | compute_target=aml_compute, 149 | source_directory=e.sources_directory_train, 150 | inputs=[pipeline_data], 151 | arguments=["--model_name", model_name_param, "--step_input", pipeline_data, ], # NOQA: E501 152 | runconfig=run_config, 153 | allow_reuse=False, 154 | ) 155 | print("Step Register created") 156 | # Check run_evaluation flag to include or exclude evaluation step. 157 | if (e.run_evaluation).lower() == "true": 158 | print("Include evaluation step before register step.") 159 | evaluate_step.run_after(train_step) 160 | register_step.run_after(evaluate_step) 161 | steps = [train_step, evaluate_step, register_step] 162 | else: 163 | print("Exclude evaluation step and directly run register step.") 164 | register_step.run_after(train_step) 165 | steps = [train_step, register_step] 166 | 167 | train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) 168 | train_pipeline._set_experiment_name 169 | train_pipeline.validate() 170 | published_pipeline = train_pipeline.publish( 171 | name=e.pipeline_name, 172 | description="Model training/retraining pipeline", 173 | version=e.build_id, 174 | ) 175 | print(f"Published pipeline: {published_pipeline.name}") 176 | print(f"for build {published_pipeline.version}") 177 | 178 | 179 | if __name__ == "__main__": 180 | main() 181 | -------------------------------------------------------------------------------- /bootstrap/bootstrap.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import platform 4 | import argparse 5 | import re 6 | 7 | 8 | class Helper: 9 | 10 | def __init__(self, project_directory, project_name): 11 | self._project_directory = project_directory 12 | self._project_name = project_name 13 | self._git_repo = "https://github.com/microsoft/MLOpsPython.git" 14 | 15 | @property 16 | def project_directory(self): 17 | return self._project_directory 18 | 19 | @property 20 | def project_name(self): 21 | return self._project_name 22 | 23 | @property 24 | def git_repo(self): 25 | return self._git_repo 26 | 27 | def rename_files(self): 28 | # Rename all files starting with diabetes_regression with project name 29 | strtoreplace = "diabetes_regression" 30 | dirs = [".pipelines", r"ml_service/pipelines"] 31 | for dir in dirs: 32 | normDir = os.path.normpath(dir) 33 | dirpath = os.path.join(self._project_directory, normDir) 34 | for filename in os.listdir(dirpath): 35 | if(filename.find(strtoreplace) != -1): 36 | src = os.path.join(self._project_directory, normDir, filename) # NOQA: E501 37 | dst = os.path.join(self._project_directory, 38 | normDir, 39 | filename.replace(strtoreplace, self._project_name, 1)) # NOQA: E501 40 | os.rename(src, dst) 41 | 42 | def rename_dir(self): 43 | dir = "diabetes_regression" 44 | src = os.path.join(self._project_directory, dir) 45 | for path, subdirs, files in os.walk(src): 46 | for name in files: 47 | newPath = path.replace(dir, self._project_name) 48 | if (not (os.path.exists(newPath))): 49 | os.mkdir(newPath) 50 | file_path = os.path.join(path, name) 51 | new_name = os.path.join(newPath, name) 52 | os.rename(file_path, new_name) 53 | 54 | def delete_dir(self): 55 | # Delete unwanted directories 56 | dirs = ["docs", r"diabetes_regression"] 57 | if (platform.system() == "Windows"): 58 | cmd = 'rmdir /S /Q "{}"' 59 | else: 60 | cmd = 'rm -r "{}"' 61 | for dir in dirs: 62 | os.system(cmd.format(os.path.join(self._project_directory, os.path.normpath(dir)))) # NOQA: E501 63 | 64 | def clean_dir(self): 65 | # Clean up directories 66 | dirs = ["data", "experimentation"] 67 | for dir in dirs: 68 | for root, dirs, files in os.walk(os.path.join(self._project_directory, dir)): # NOQA: E501 69 | for file in files: 70 | os.remove(os.path.join(root, file)) 71 | 72 | def validate_args(self): 73 | # Validate arguments 74 | if (os.path.isdir(self._project_directory) is False): 75 | raise Exception("Not a valid directory. Please provide an absolute directory path.") # NOQA: E501 76 | if (len(self._project_name) < 3 or len(self._project_name) > 15): 77 | raise Exception("Invalid project name length. Project name should be 3 to 15 chars long, letters and underscores only.") # NOQA: E501 78 | if (not re.search("^[\\w_]+$", self._project_name)): 79 | raise Exception("Invalid characters in project name. Project name should be 3 to 15 chars long, letters and underscores only.") # NOQA: E501 80 | 81 | 82 | def replace_project_name(project_dir, project_name, rename_name): 83 | # Replace instances of rename_name within files with project_name 84 | files = [r".env.example", 85 | r".pipelines/code-quality-template.yml", 86 | r".pipelines/pr.yml", 87 | r".pipelines/diabetes_regression-cd.yml", 88 | r".pipelines/diabetes_regression-ci.yml", 89 | r".pipelines/abtest.yml", 90 | r".pipelines/diabetes_regression-ci-image.yml", 91 | r".pipelines/diabetes_regression-publish-model-artifact-template.yml", # NOQA: E501 92 | r".pipelines/diabetes_regression-get-model-id-artifact-template.yml", # NOQA: E501 93 | r".pipelines/diabetes_regression-batchscoring-ci.yml", 94 | r".pipelines/diabetes_regression-variables-template.yml", 95 | r"environment_setup/Dockerfile", 96 | r"environment_setup/install_requirements.sh", 97 | r"ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py", # NOQA: E501 98 | r"ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py", # NOQA: E501 99 | r"ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py", # NOQA: E501 100 | r"ml_service/pipelines/diabetes_regression_build_train_pipeline.py", # NOQA: E501 101 | r"ml_service/util/create_scoring_image.py", 102 | r"diabetes_regression/conda_dependencies.yml", 103 | r"diabetes_regression/evaluate/evaluate_model.py", 104 | r"diabetes_regression/register/register_model.py", 105 | r"diabetes_regression/training/test_train.py"] 106 | 107 | for file in files: 108 | path = os.path.join(project_dir, os.path.normpath(file)) 109 | try: 110 | with open(path, "rt", encoding="utf8") as f_in: 111 | data = f_in.read() 112 | data = data.replace(rename_name, project_name) 113 | with open(os.path.join(project_dir, file), "wt", encoding="utf8") as f_out: # NOQA: E501 114 | f_out.write(data) 115 | except IOError as e: 116 | print("Could not modify \"%s\". Is the MLOpsPython repo already cloned at \"%s\"?" % (path, project_dir)) # NOQA: E501 117 | raise e 118 | 119 | 120 | def main(args): 121 | parser = argparse.ArgumentParser(description='New Template') 122 | parser.add_argument("-d", 123 | "--directory", 124 | type=str, 125 | required=True, 126 | help="Absolute path to new project direcory") 127 | parser.add_argument("-n", 128 | "--name", 129 | type=str, 130 | required=True, 131 | help="Name of the project [3-15 chars, letters and underscores only]") # NOQA: E501 132 | try: 133 | args = parser.parse_args() 134 | 135 | project_directory = args.directory 136 | project_name = args.name 137 | 138 | helper = Helper(project_directory, project_name) 139 | helper.validate_args() 140 | helper.clean_dir() 141 | 142 | replace_project_name(project_directory, project_name, "diabetes_regression") # NOQA: E501 143 | replace_project_name(project_directory, project_name, "diabetes") 144 | 145 | helper.rename_files() 146 | helper.rename_dir() 147 | helper.delete_dir() 148 | except Exception as e: 149 | print(e) 150 | 151 | return 0 152 | 153 | 154 | if '__main__' == __name__: 155 | sys.exit(main(sys.argv)) 156 | -------------------------------------------------------------------------------- /diabetes_regression/register/register_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) Microsoft Corporation. All rights reserved.​ 3 | ​ 4 | Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, 5 | royalty-free right to use, copy, and modify the software code provided by us 6 | ("Software Code"). You may not sublicense the Software Code or any use of it 7 | (except to your affiliates and to vendors to perform work on your behalf) 8 | through distribution, network access, service agreement, lease, rental, or 9 | otherwise. This license does not purport to express any claim of ownership over 10 | data you may have shared with Microsoft in the creation of the Software Code. 11 | Unless applicable law gives you more rights, Microsoft reserves all other 12 | rights not expressly granted herein, whether by implication, estoppel or 13 | otherwise. ​ 14 | ​ 15 | THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 21 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 22 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | """ 26 | import json 27 | import os 28 | import sys 29 | import argparse 30 | import traceback 31 | import joblib 32 | from azureml.core import Run, Experiment, Workspace, Dataset 33 | from azureml.core.model import Model as AMLModel 34 | 35 | 36 | def main(): 37 | 38 | run = Run.get_context() 39 | if (run.id.startswith('OfflineRun')): 40 | from dotenv import load_dotenv 41 | # For local development, set values in this section 42 | load_dotenv() 43 | workspace_name = os.environ.get("WORKSPACE_NAME") 44 | experiment_name = os.environ.get("EXPERIMENT_NAME") 45 | resource_group = os.environ.get("RESOURCE_GROUP") 46 | subscription_id = os.environ.get("SUBSCRIPTION_ID") 47 | # run_id useful to query previous runs 48 | run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012" 49 | aml_workspace = Workspace.get( 50 | name=workspace_name, 51 | subscription_id=subscription_id, 52 | resource_group=resource_group 53 | ) 54 | ws = aml_workspace 55 | exp = Experiment(ws, experiment_name) 56 | else: 57 | ws = run.experiment.workspace 58 | exp = run.experiment 59 | run_id = 'amlcompute' 60 | 61 | parser = argparse.ArgumentParser("register") 62 | 63 | parser.add_argument( 64 | "--run_id", 65 | type=str, 66 | help="Training run ID", 67 | ) 68 | 69 | parser.add_argument( 70 | "--model_name", 71 | type=str, 72 | help="Name of the Model", 73 | default="diabetes_model.pkl", 74 | ) 75 | 76 | parser.add_argument( 77 | "--step_input", 78 | type=str, 79 | help=("input from previous steps") 80 | ) 81 | 82 | args = parser.parse_args() 83 | if (args.run_id is not None): 84 | run_id = args.run_id 85 | if (run_id == 'amlcompute'): 86 | run_id = run.parent.id 87 | model_name = args.model_name 88 | model_path = args.step_input 89 | 90 | print("Getting registration parameters") 91 | 92 | # Load the registration parameters from the parameters file 93 | with open("parameters.json") as f: 94 | pars = json.load(f) 95 | try: 96 | register_args = pars["registration"] 97 | except KeyError: 98 | print("Could not load registration values from file") 99 | register_args = {"tags": []} 100 | 101 | model_tags = {} 102 | for tag in register_args["tags"]: 103 | try: 104 | mtag = run.parent.get_metrics()[tag] 105 | model_tags[tag] = mtag 106 | except KeyError: 107 | print(f"Could not find {tag} metric on parent run.") 108 | 109 | # load the model 110 | print("Loading model from " + model_path) 111 | model_file = os.path.join(model_path, model_name) 112 | model = joblib.load(model_file) 113 | parent_tags = run.parent.get_tags() 114 | try: 115 | build_id = parent_tags["BuildId"] 116 | except KeyError: 117 | build_id = None 118 | print("BuildId tag not found on parent run.") 119 | print(f"Tags present: {parent_tags}") 120 | try: 121 | build_uri = parent_tags["BuildUri"] 122 | except KeyError: 123 | build_uri = None 124 | print("BuildUri tag not found on parent run.") 125 | print(f"Tags present: {parent_tags}") 126 | 127 | if (model is not None): 128 | dataset_id = parent_tags["dataset_id"] 129 | if (build_id is None): 130 | register_aml_model( 131 | model_file, 132 | model_name, 133 | model_tags, 134 | exp, 135 | run_id, 136 | dataset_id) 137 | elif (build_uri is None): 138 | register_aml_model( 139 | model_file, 140 | model_name, 141 | model_tags, 142 | exp, 143 | run_id, 144 | dataset_id, 145 | build_id) 146 | else: 147 | register_aml_model( 148 | model_file, 149 | model_name, 150 | model_tags, 151 | exp, 152 | run_id, 153 | dataset_id, 154 | build_id, 155 | build_uri) 156 | else: 157 | print("Model not found. Skipping model registration.") 158 | sys.exit(0) 159 | 160 | 161 | def model_already_registered(model_name, exp, run_id): 162 | model_list = AMLModel.list(exp.workspace, name=model_name, run_id=run_id) 163 | if len(model_list) >= 1: 164 | e = ("Model name:", model_name, "in workspace", 165 | exp.workspace, "with run_id ", run_id, "is already registered.") 166 | print(e) 167 | raise Exception(e) 168 | else: 169 | print("Model is not registered for this run.") 170 | 171 | 172 | def register_aml_model( 173 | model_path, 174 | model_name, 175 | model_tags, 176 | exp, 177 | run_id, 178 | dataset_id, 179 | build_id: str = 'none', 180 | build_uri=None 181 | ): 182 | try: 183 | tagsValue = {"area": "diabetes_regression", 184 | "run_id": run_id, 185 | "experiment_name": exp.name} 186 | tagsValue.update(model_tags) 187 | if (build_id != 'none'): 188 | model_already_registered(model_name, exp, run_id) 189 | tagsValue["BuildId"] = build_id 190 | if (build_uri is not None): 191 | tagsValue["BuildUri"] = build_uri 192 | 193 | model = AMLModel.register( 194 | workspace=exp.workspace, 195 | model_name=model_name, 196 | model_path=model_path, 197 | tags=tagsValue, 198 | datasets=[('training data', 199 | Dataset.get_by_id(exp.workspace, dataset_id))]) 200 | os.chdir("..") 201 | print( 202 | "Model registered: {} \nModel Description: {} " 203 | "\nModel Version: {}".format( 204 | model.name, model.description, model.version 205 | ) 206 | ) 207 | except Exception: 208 | traceback.print_exc(limit=None, file=None, chain=True) 209 | print("Model registration failed") 210 | raise 211 | 212 | 213 | if __name__ == '__main__': 214 | main() 215 | -------------------------------------------------------------------------------- /docs/canary_ab_deployment.md: -------------------------------------------------------------------------------- 1 | # Model deployment to AKS cluster with Canary deployment 2 | 3 | [![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/microsoft.MLOpsPython-Canary?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=133&branchName=master) 4 | 5 | If your target deployment environment is a Kubernetes cluster and you want to implement [Canary and/or A/B testing deployment strategies](http://adfpractice-fedor.blogspot.com/2019/04/deployment-strategies-with-kubernetes.html) you can follow this sample guide. 6 | 7 | - [Prerequisites](#prerequisites) 8 | - [Install Istio on a K8s cluster](#install-istio-on-a-k8s-cluster) 9 | - [Set up variables](#set-up-variables) 10 | - [Configure a pipeline to build and deploy a scoring Image](#configure-a-pipeline-to-build-and-deploy-a-scoring-image) 11 | - [Build a new Scoring Image](#build-a-new-scoring-image) 12 | 13 | ## Prerequisites 14 | 15 | Before continuing with this guide, you will need: 16 | 17 | * An [Azure Kubernetes Service (AKS)](https://azure.microsoft.com/en-us/services/kubernetes-service) cluster 18 | * This does **not** have to be the same cluster as the example in [Getting Started: Deploy the model to Azure Kubernetes Service](/docs/getting_started.md#deploy-the-model-to-azure-kubernetes-service) 19 | * The cluster does not have to be connected to Azure Machine Learning. 20 | * If you want to deploy a new cluster, see [Quickstart: Deploy an Azure Kubernetes Service cluster using the Azure CLI](https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough) 21 | * An Azure Container Registry instance that is authenticated with your Azure Kubernetes Service cluster. 22 | * The chart you will deploy is assuming you are authenticated using a service principal. 23 | * See [Authenticate with Azure Container Registry from Azure Kubernetes Service](https://docs.microsoft.com/en-us/azure/aks/cluster-container-registry-integration#configure-acr-integration-for-existing-aks-clusters) for an authentication guide. 24 | * In Azure DevOps, a service connection to your Kubernetes cluster. 25 | * If you do not currently have a namespace, create one named 'abtesting'. 26 | 27 | ## Install Istio on a K8s cluster 28 | 29 | You'll be using the [Istio](https://istio.io) service mesh implementation to control traffic routing between model versions. Follow the instructions at [Install and use Istio in Azure Kubernetes Service (AKS)](https://docs.microsoft.com/azure/aks/servicemesh-istio-install?pivots=client-operating-system-linux). 30 | 31 | After Istio is installed, figure out the Istio gateway endpoint on your K8s cluster: 32 | 33 | ```bash 34 | GATEWAY_IP=$(kubectl get svc istio-ingressgateway -n istio-system -o jsonpath='{.status.loadBalancer.ingress[0].ip}') 35 | ``` 36 | 37 | You don't need to create any Istio resources (e.g. Gateway or VirtualService) at this point. It will be handled by the AzDo pipeline that builds and deploys a scoring image. 38 | 39 | ## Set up variables 40 | 41 | There are some extra variables that you need to setup in ***devopsforai-aml-vg*** variable group (see [getting started](./getting_started.md)): 42 | 43 | | Variable Name | Suggested Value | Short Description | 44 | |---------------------------|-----------------------|-----------------------------------------------------------| 45 | | K8S_AB_SERVICE_CONNECTION | mlops-aks | Name of the service connection to your Kubernetes cluster | 46 | | K8S_AB_NAMESPACE | abtesting | Kubernetes namespace for model deployment | 47 | | IMAGE_REPO_NAME | [Your ACR's DNS name] | Image reposiory name (e.g. mlopspyciamlcr.azurecr.io) | 48 | 49 | ## Configure a pipeline to build and deploy a scoring Image 50 | 51 | Import and run the [abtest.yml](./.pipelines/abtest.yml) multistage deployment pipeline. 52 | 53 | After the pipeline completes successfully, you will see a registered Docker image in the ACR repository attached to the Azure ML Service: 54 | 55 | ![scoring image](./images/scoring_image.png) 56 | 57 | The pipeline creates Istio Gateway and VirtualService and deploys the scoring image to the Kubernetes cluster. 58 | 59 | ```bash 60 | kubectl get deployments --namespace abtesting 61 | NAME READY UP-TO-DATE AVAILABLE AGE 62 | model-green 1/1 1 1 19h 63 | ``` 64 | 65 | ## Build a new Scoring Image 66 | 67 | Change value of the ***SCORE_SCRIPT*** variable in the [abtest.yml](./.pipelines/abtest.yml) to point to ***scoring/scoreA.py*** and merge it to the master branch. 68 | 69 | **Note:** ***scoreA.py*** and ***scoreB.py*** files used in this tutorial are just mockups returning either "New Model A" or "New Model B" respectively. They are used to demonstrate the concept of testing two scoring images with different models or scoring code. In real life you would implement a scoring file similar to [score.py](./../code/scoring/score.py) (see the [Getting Started](./getting_started.md) guide). 70 | 71 | It will automatically trigger the pipeline and deploy a new scoring image with the following stages implementing ***Canary*** deployment strategy: 72 | 73 | | Stage | Green Weight | Blue Weight | Description | 74 | |------------|--------------|-------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------| 75 | | Blue_0 | 100 | 0 | New image (blue) is deployed.
But all traffic (100%) is still routed to the old (green) image. | 76 | | Blue_50 | 50 | 50 | Traffic is split between old (green) and new (blue) images 50/50. | 77 | | Blue_100 | 0 | 100 | All traffic (100%) is routed to the blue image. | 78 | | Blue_Green | 0 | 100 | Old green image is removed. The new blue image is copied as green.
Blue and Green images are equal.
All traffic (100%) is routed to the blue image. | 79 | | Green_100 | 100 | 0 | All traffic (100%) is routed to the green image.
The blue image is removed. | 80 | 81 | **Note:** The pipeline performs the rollout without any pausing. You may want to configure [Approvals and Checks](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/approvals?view=azure-devops&tabs=check-pass) for the stages on your environment for better experience of the model testing. The environment ***abtestenv*** will be added automatically to your AzDo project after the first pipeline run. 82 | 83 | At each stage you can verify how the traffic is routed sending requests to $GATEWAY_IP/score with ***Postman*** or with ***curl***: 84 | 85 | ```bash 86 | curl $GATEWAY_IP/score 87 | ``` 88 | 89 | You can also emulate a simple load test on the gateway with the ***load_test.sh***: 90 | 91 | ```bash 92 | ./charts/load_test.sh 10 $GATEWAY_IP/score 93 | ``` 94 | 95 | The command above sends 10 requests to the gateway. So if the pipeline has completed stage Blue_50, the result will look like this: 96 | 97 | ```bash 98 | "New Model A" 99 | "New Model A" 100 | "New Model A" 101 | "New Model B" 102 | "New Model A" 103 | "New Model B" 104 | "New Model B" 105 | "New Model A" 106 | "New Model A" 107 | "New Model A" 108 | ``` 109 | 110 | Regardless of the blue/green weight values set on the cluster, you can perform ***A/B testing*** and send requests directly to either blue or green images: 111 | 112 | ```bash 113 | curl --header "x-api-version: blue" $GATEWAY_IP/score 114 | curl --header "x-api-version: green" $GATEWAY_IP/score 115 | ``` 116 | 117 | or with a load_test.sh script: 118 | 119 | ```bash 120 | ./charts/load_test.sh 10 $GATEWAY_IP/score blue 121 | ./charts/load_test.sh 10 $GATEWAY_IP/score green 122 | ``` 123 | 124 | In this case the Istio Virtual Service analyzes the request header and routes the traffic directly to the specified model version. 125 | -------------------------------------------------------------------------------- /docs/code_description.md: -------------------------------------------------------------------------------- 1 | ## Repo Details 2 | 3 | ### Directory Structure 4 | 5 | High level directory structure for this repository: 6 | 7 | ```bash 8 | ├── .pipelines <- Azure DevOps YAML pipelines for CI, PR and model training and deployment. 9 | ├── bootstrap <- Python script to initialize this repository with a custom project name. 10 | ├── charts <- Helm charts to deploy resources on Azure Kubernetes Service(AKS). 11 | ├── data <- Initial set of data to train and evaluate model. Not for use to store data. 12 | ├── diabetes_regression <- The top-level folder for the ML project. 13 | │ ├── evaluate <- Python script to evaluate trained ML model. 14 | │ ├── register <- Python script to register trained ML model with Azure Machine Learning Service. 15 | │ ├── scoring <- Python score.py to deploy trained ML model. 16 | │ ├── training <- Python script to train ML model. 17 | │ ├── R <- R script to train R based ML model. 18 | │ ├── util <- Python script for various utility operations specific to this ML project. 19 | ├── docs <- Extensive markdown documentation for entire project. 20 | ├── environment_setup <- The top-level folder for everything related to infrastructure. 21 | │ ├── arm-templates <- Azure Resource Manager(ARM) templates to build infrastructure needed for this project. 22 | │ ├── tf-templates <- Terraform templates to build infrastructure needed for this project. 23 | ├── experimentation <- Jupyter notebooks with ML experimentation code. 24 | ├── ml_service <- The top-level folder for all Azure Machine Learning resources. 25 | │ ├── pipelines <- Python script that builds Azure Machine Learning pipelines. 26 | │ ├── util <- Python script for various utility operations specific to Azure Machine Learning. 27 | ├── .env.example <- Example .env file with environment for local development experience. 28 | ├── .gitignore <- A gitignore file specifies intentionally un-tracked files that Git should ignore. 29 | ├── LICENSE <- License document for this project. 30 | ├── README.md <- The top-level README for developers using this project. 31 | ``` 32 | 33 | The repository provides a template with folders structure suitable for maintaining multiple ML projects. There are common folders such as ***.pipelines***, ***environment_setup***, ***ml_service*** and folders containing the code base for each ML project. This repository contains a single sample ML project in the ***diabetes_regression*** folder. This folder is going to be automatically renamed to your project name if you follow the [bootstrap procedure](../bootstrap/README.md). 34 | 35 | ### Environment Setup 36 | 37 | - `environment_setup/install_requirements.sh` : This script prepares a local conda environment i.e. install the Azure ML SDK and the packages specified in environment definitions. 38 | 39 | - `environment_setup/iac-*-arm.yml, arm-templates` : Infrastructure as Code piplines to create required resources using ARM, along with corresponding arm-templates. Infrastructure as Code can be deployed with this template or with the Terraform template. 40 | 41 | - `environment_setup/iac-*-tf.yml, tf-templates` : Infrastructure as Code piplines to create required resources using Terraform, along with corresponding tf-templates. Infrastructure as Code can be deployed with this template or with the ARM template. 42 | 43 | - `environment_setup/iac-remove-environment.yml` : Infrastructure as Code piplines to delete the created required resources. 44 | 45 | - `environment_setup/Dockerfile` : Dockerfile of a build agent containing Python 3.6 and all required packages. 46 | 47 | - `environment_setup/docker-image-pipeline.yml` : An AzDo pipeline for building and pushing [microsoft/mlopspython](https://hub.docker.com/_/microsoft-mlops-python) image. 48 | 49 | ### Pipelines 50 | 51 | - `.pipelines/abtest.yml` : a pipeline demonstrating [Canary deployment strategy](./docs/canary_ab_deployment.md). 52 | - `.pipelines/code-quality-template.yml` : a pipeline template used by the CI and PR pipelines. It contains steps performing linting, data and unit testing. 53 | - `.pipelines/diabetes_regression-ci-image.yml` : a pipeline building a scoring image for the diabetes regression model. 54 | - `.pipelines/diabetes_regression-ci.yml` : a pipeline triggered when the code is merged into **master**. It performs linting, data integrity testing, unit testing, building and publishing an ML pipeline. 55 | - `.pipelines/diabetes_regression-cd.yml` : a pipeline triggered when the code is merged into **master** and the `.pipelines/diabetes_regression-ci.yml` completes. It performs linting, data integrity testing, unit testing, building and publishing an ML pipeline. 56 | - `.pipelines/diabetes_regression-package-model-template.yml` : a pipeline triggered when the code is merged into **master**. It deploys the registered model to a target. 57 | - `.pipelines/diabetes_regression-get-model-id-artifact-template.yml` : a pipeline template used by the `.pipelines/diabetes_regression-cd.yml` pipeline. It takes the model metadata artifact published by the previous pipeline and gets the model ID. 58 | - `.pipelines/diabetes_regression-publish-model-artifact-template.yml` : a pipeline template used by the `.pipelines/diabetes_regression-ci.yml` pipeline. It finds out if a new model was registered and publishes a pipeline artifact containing the model metadata. 59 | - `.pipelines/helm-*.yml` : pipeline templates used by the `.pipelines/abtest.yml` pipeline. 60 | - `.pipelines/pr.yml` : a pipeline triggered when a **pull request** to the **master** branch is created. It performs linting, data integrity testing and unit testing only. 61 | 62 | ### ML Services 63 | 64 | - `ml_service/pipelines/diabetes_regression_build_train_pipeline.py` : builds and publishes an ML training pipeline. It uses Python on ML Compute. 65 | - `ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py` : builds and publishes an ML training pipeline. It uses R on ML Compute. 66 | - `ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` : builds and publishes an ML training pipeline. It uses R on Databricks Compute. 67 | - `ml_service/pipelines/run_train_pipeline.py` : invokes a published ML training pipeline (Python on ML Compute) via REST API. 68 | - `ml_service/util` : contains common utility functions used to build and publish an ML training pipeline. 69 | 70 | ### Environment Definitions 71 | 72 | - `diabetes_regression/conda_dependencies.yml` : Conda environment definition for the environment used for both training and scoring (Docker image in which train.py and score.py are run). 73 | - `diabetes_regression/ci_dependencies.yml` : Conda environment definition for the CI environment. 74 | 75 | ### Training Step 76 | 77 | - `diabetes_regression/training/train_aml.py`: a training step of an ML training pipeline. 78 | - `diabetes_regression/training/train.py` : ML functionality called by train_aml.py 79 | - `diabetes_regression/training/R/r_train.r` : training a model with R basing on a sample dataset (weight_data.csv). 80 | - `diabetes_regression/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute 81 | - `diabetes_regression/training/R/train_with_r_on_databricks.py` : a python wrapper (ML Pipeline Step) invoking R training script on Databricks Compute 82 | - `diabetes_regression/training/R/weight_data.csv` : a sample dataset used by R script (r_train.r) to train a model 83 | - `diabetes_regression/training/R/test_train.py` : a unit test for the training script(s) 84 | 85 | ### Evaluation Step 86 | 87 | - `diabetes_regression/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one. 88 | 89 | ### Registering Step 90 | 91 | - `diabetes_regression/evaluate/register_model.py` : registers a new trained model if evaluation shows the new model is more performant than the previous one. 92 | 93 | ### Scoring 94 | 95 | - `diabetes_regression/scoring/score.py` : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment. 96 | - `diabetes_regression/scoring/inference_config.yml`, `deployment_config_aci.yml`, `deployment_config_aks.yml` : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets. 97 | - `diabetes_regression/scoring/scoreA.py`, `diabetes_regression/scoring/scoreB.py` : simplified scoring files for the [Canary deployment sample](./docs/canary_ab_deployment.md). 98 | -------------------------------------------------------------------------------- /docs/custom_model.md: -------------------------------------------------------------------------------- 1 | # Bring your own code with the MLOpsPython repository template 2 | 3 | This document provides steps to follow when using this repository as a template to train models and deploy the models with real-time inference in Azure ML with your own scripts and data. 4 | 5 | 1. Follow the MLOpsPython [Getting Started](getting_started.md) guide 6 | 1. Bootstrap the project 7 | 1. Configure training data 8 | 1. [If necessary] Convert your ML experimental code into production ready code 9 | 1. Replace the training code 10 | 1. [Optional] Update the evaluation code 11 | 1. Customize the build agent environment 12 | 1. [If appropriate] Replace the score code 13 | 1. [If appropriate] Configure batch scoring data 14 | 15 | ## Follow the Getting Started guide 16 | 17 | Follow the [Getting Started](getting_started.md) guide to set up the infrastructure and pipelines to execute MLOpsPython. 18 | 19 | Take a look at the [Repo Details](code_description.md) document for a description of the structure of this repository. 20 | 21 | ## Bootstrap the project 22 | 23 | Bootstrapping will prepare the directory structure to be used for your project name which includes: 24 | 25 | * renaming files and folders from the base project name `diabetes_regression` to your project name 26 | * fixing imports and absolute path based on your project name 27 | * deleting and cleaning up some directories 28 | 29 | **Note:** Since the bootstrap script will rename the `diabetes_regression` folder to the project name of your choice, we'll refer to your project as `[project name]` when paths are involved. 30 | 31 | To bootstrap from the existing MLOpsPython repository: 32 | 33 | 1. Ensure Python 3 is installed locally 34 | 1. From a local copy of the code, run the `bootstrap.py` script in the `bootstrap` folder 35 | `python bootstrap.py -d [dirpath] -n [projectname]` 36 | * `[dirpath]` is the absolute path to the root of the directory where MLOpsPython is cloned 37 | * `[projectname]` is the name of your ML project 38 | 39 | # Configure Custom Training 40 | 41 | ## Configure training data 42 | 43 | The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. 44 | 45 | **Important** Convert the template to use your own Azure ML Dataset for model training via these steps: 46 | 47 | 1. [Create a Dataset](https://docs.microsoft.com/azure/machine-learning/how-to-create-register-datasets) in your Azure ML workspace 48 | 1. Update the `DATASET_NAME` and `DATASTORE_NAME` variables in `.pipelines/[project name]-variables-template.yml` 49 | 50 | ## Convert your ML experimental code into production ready code 51 | 52 | The MLOpsPython template creates an Azure Machine Learning (ML) pipeline that invokes a set of [Azure ML pipeline steps](https://docs.microsoft.com/python/api/azureml-pipeline-steps/azureml.pipeline.steps) (see `ml_service/pipelines/[project name]_build_train_pipeline.py`). If your experiment is currently in a Jupyter notebook, it will need to be refactored into scripts that can be run independently and dropped into the template which the existing Azure ML pipeline steps utilize. 53 | 54 | 1. Refactor your experiment code into scripts 55 | 1. [Recommended] Prepare unit tests 56 | 57 | Examples of all these scripts are provided in this repository. 58 | See the [Convert ML experimental code to production code tutorial](https://docs.microsoft.com/azure/machine-learning/tutorial-convert-ml-experiment-to-production) for a step by step guide and additional details. 59 | 60 | ## Replace training code 61 | 62 | The template contains three scripts in the `[project name]/training` folder. Update these scripts for your experiment code. 63 | 64 | * `train.py` contains the platform-agnostic logic required to do basic data preparation and train the model. This script can be invoked against a static data file for local development. 65 | * `train_aml.py` is the entry script for the ML pipeline step. It invokes the functions in `train.py` in an Azure ML context and adds logging. `train_aml.py` loads parameters for training from `[project name]/parameters.json` and passes them to the training function in `train.py`. If your experiment code can be refactored to match the function signatures in `train.py`, this file shouldn't need many changes. 66 | * `test_train.py` contains tests that guard against functional regressions in `train.py`. Remove this file if you have no tests for your own code. 67 | 68 | Add any dependencies required by training to `[project name]/conda_dependencies.yml]`. This file will be used to generate the environment that the pipeline steps will run in. 69 | 70 | ## Update evaluation code 71 | 72 | The MLOpsPython template uses the evaluate_model script to compare the performance of the newly trained model and the current production model based on Mean Squared Error. If the performance of the newly trained model is better than the current production model, then the pipelines continue. Otherwise, the pipelines are canceled. 73 | 74 | To keep the evaluation step, replace all instances of `mse` in `[project name]/evaluate/evaluate_model.py` with the metric that you want. 75 | 76 | To disable the evaluation step, either: 77 | 78 | * set the DevOps pipeline variable `RUN_EVALUATION` to `false` 79 | * uncomment `RUN_EVALUATION` in `.pipelines/[project name]-variables-template.yml` and set the value to `false` 80 | 81 | ## Customize the build agent environment 82 | 83 | The DevOps pipeline definitions in the MLOpsPython template run several steps in a Docker container that contains the dependencies required to work through the Getting Started guide. If additional dependencies are required to run your unit tests or generate your Azure ML pipeline, there are a few options: 84 | 85 | * Add a pipeline step to install dependencies required by unit tests to `.pipelines/code-quality-template.yml`. Recommended if you only have a small number of test dependencies. 86 | * Create a new Docker image containing your dependencies. See [docs/custom_container.md](custom_container.md). Recommended if you have a larger number of dependencies, or if the overhead of installing additional dependencies on each run is too high. 87 | * Remove the container references from the pipeline definition files and run the pipelines on self hosted agents with dependencies pre-installed. 88 | 89 | # Configure Custom Scoring 90 | 91 | ## Replace score code 92 | 93 | For the model to provide real-time inference capabilities, the score code needs to be replaced. The MLOpsPython template uses the score code to deploy the model to do real-time scoring on ACI, AKS, or Web apps. 94 | 95 | If you want to keep scoring: 96 | 97 | 1. Update or replace `[project name]/scoring/score.py` 98 | 1. Add any dependencies required by scoring to `[project name]/conda_dependencies.yml` 99 | 1. Modify the test cases in the `ml_service/util/smoke_test_scoring_service.py` script to match the schema of the training features in your data 100 | 101 | # Configure Custom Batch Scoring 102 | 103 | ## Configure input and output data 104 | 105 | The batch scoring pipeline is configured to use the default datastore for input and output. It will use sample data for scoring. 106 | 107 | In order to configure your own input datastore and output datastores, you will need to specify an Azure Blob Storage Account and set up input and output containers. 108 | 109 | Configure the variables below in your variable group. 110 | 111 | **Note: The datastore storage resource, input/output containers, and scoring data is not created automatically. Make sure that you have manually provisioned these resources and placed your scoring data in your input container with the proper name.** 112 | 113 | 114 | | Variable Name | Suggested Value | Short description | 115 | | ------------------------ | ------------------------- | --------------------------------------------------------------------------------------------------------------------------- | 116 | | SCORING_DATASTORE_STORAGE_NAME | | [Azure Blob Storage Account](https://docs.microsoft.com/en-us/azure/storage/blobs/) name. | 117 | | SCORING_DATASTORE_ACCESS_KEY | | [Azure Storage Account Key](https://docs.microsoft.com/en-us/rest/api/storageservices/authorize-requests-to-azure-storage). You may want to consider linking this variable to Azure KeyVault to avoid storing the access key in plain text. | 118 | | SCORING_DATASTORE_INPUT_CONTAINER | | The name of the container for input data. Defaults to `input` if not set. | 119 | | SCORING_DATASTORE_OUTPUT_CONTAINER| | The name of the container for output data. Defaults to `output` if not set. | 120 | | SCORING_DATASTORE_INPUT_FILENAME | | The filename of the input data in your container Defaults to `diabetes_scoring_input.csv` if not set. | 121 | | SCORING_DATASET_NAME | | The AzureML Dataset name to use. Defaults to `diabetes_scoring_ds` if not set (optional). | 122 | | SCORING_DATASTORE_OUTPUT_FILENAME | | The filename to use for the output data. The pipeline will create this file. Defaults to `diabetes_scoring_output.csv` if not set (optional). | 123 | 124 | --------------------------------------------------------------------------------