├── .github
    ├── actions
    │   ├── aml-endpoint-deploy
    │   │   └── action.yaml
    │   ├── aml-endpoint-swap
    │   │   └── action.yaml
    │   ├── aml-endpoint-test
    │   │   └── action.yaml
    │   └── aml-job-create
    │   │   └── action.yaml
    └── workflows
    │   ├── workshop_cd.yml
    │   ├── workshop_ci.yml
    │   └── workshop_unit_test.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── MLOps-ADO-ADB
    ├── .azure_pipelines
    │   ├── cd.yml
    │   ├── ci.yml
    │   ├── move_model.yml
    │   ├── templates
    │   │   ├── aml-batch-score-deploy
    │   │   │   └── step.yml
    │   │   ├── aml-endpoint-deploy
    │   │   │   └── step.yml
    │   │   ├── aml-job-create
    │   │   │   └── step.yml
    │   │   ├── aml-model-compare
    │   │   │   ├── evaluate.py
    │   │   │   └── step.yml
    │   │   ├── aml-model-register
    │   │   │   ├── step.yml
    │   │   │   └── step2.yml
    │   │   └── devops-create-pr
    │   │   │   └── step.yml
    │   ├── variables.yml
    │   └── workshop_unit_test.yml
    ├── .github
    │   ├── actions
    │   │   ├── aml-endpoint-deploy
    │   │   │   └── action.yaml
    │   │   ├── aml-endpoint-swap
    │   │   │   └── action.yaml
    │   │   ├── aml-endpoint-test
    │   │   │   └── action.yaml
    │   │   └── aml-job-create
    │   │   │   └── action.yaml
    │   └── workflows
    │   │   ├── workshop_cd.yml
    │   │   ├── workshop_ci.yml
    │   │   └── workshop_unit_test.yml
    ├── .gitignore
    ├── CODE_OF_CONDUCT.md
    ├── LICENSE.txt
    ├── README.md
    ├── SECURITY.md
    ├── SUPPORT.md
    └── src
    │   └── workshop
    │       ├── README.md
    │       ├── documents
    │           ├── images
    │           │   ├── arm000.png
    │           │   ├── arm001.png
    │           │   ├── arm002.png
    │           │   ├── arm100.png
    │           │   ├── cicd.png
    │           │   ├── cloudshell-accept.png
    │           │   ├── cloudshell-firstlaunch.png
    │           │   ├── cloudshell.png
    │           │   ├── cloudshell2.png
    │           │   ├── deploy-to-azure.svg
    │           │   ├── image-10.png
    │           │   ├── image-11.png
    │           │   ├── image-12.png
    │           │   ├── image-13.png
    │           │   ├── image-14.png
    │           │   ├── image-15.png
    │           │   ├── image-16.png
    │           │   ├── image-8.png
    │           │   ├── image-9.png
    │           │   ├── monolithic_modular.png
    │           │   ├── part3cicd.png
    │           │   ├── part_0_adb_add_sp.png
    │           │   ├── part_0_ado_add_sp.png
    │           │   ├── part_0_ado_pipe1.png
    │           │   ├── part_0_ado_pipe2.png
    │           │   ├── part_0_ado_pipe3.png
    │           │   ├── part_0_ado_pipe4.png
    │           │   ├── part_0_ado_pipe5.png
    │           │   ├── part_0_integration_policies.png
    │           │   ├── part_0_main_policies.png
    │           │   ├── part_0_set_model_permissions.png
    │           │   ├── part_1_adb_create_branch.png
    │           │   ├── part_1_adb_file_exp_dev.png
    │           │   ├── part_1_branch_ui_integration.png
    │           │   ├── part_1_db_repo_file_explorer.png
    │           │   ├── part_1_git_options_from_adb_repo.png
    │           │   ├── part_1_model_registry.png
    │           │   ├── part_2_aad_login.png
    │           │   ├── part_2_ado_manual_trigger.png
    │           │   ├── part_2_azpipe_run_nb.png
    │           │   ├── part_2_pipe_adb_step.png
    │           │   ├── part_2_pipe_job.png
    │           │   ├── part_2_run_job.png
    │           │   ├── part_3_adb_repo_commit_push.png
    │           │   ├── part_3_adb_repo_link_in_nb.png
    │           │   ├── part_3_unit_test_triggers.png
    │           │   ├── part_4_adb_training_workflow.png
    │           │   ├── run_mlopsworkshop_azcli000.png
    │           │   ├── run_mlopsworkshop_azcli001.png
    │           │   ├── run_mlopsworkshop_azcli002.png
    │           │   ├── run_mlopsworkshop_azcli003.png
    │           │   ├── run_mlopsworkshop_azcli004.png
    │           │   ├── run_mlopsworkshop_azcli005.png
    │           │   ├── run_mlopsworkshop_azcli006.png
    │           │   ├── run_mlopsworkshop_azcli007.png
    │           │   ├── run_mlopsworkshop_azcli008.png
    │           │   ├── run_mlopsworkshop_azcli009.png
    │           │   ├── run_mlopsworkshop_azcli010.png
    │           │   ├── training_pipeline.png
    │           │   └── video_img.png
    │           ├── part_0.md
    │           ├── part_1.md
    │           ├── part_2.md
    │           ├── part_3.md
    │           ├── part_4.md
    │           ├── part_5.md
    │           └── part_tips.md
    │       └── notebooks
    │           ├── mlflow-end-to-end-example.ipynb
    │           ├── part_0_create_datasets.ipynb
    │           ├── part_1_1_data_prep.ipynb
    │           ├── part_1_2_training.ipynb
    │           ├── part_1_3_evaluating.ipynb
    │           ├── part_1_4_scoring.ipynb
    │           └── part_4_new_training_code.ipynb
├── README.md
├── SECURITY.md
├── SUPPORT.md
└── src
    ├── __init__.py
    ├── active_learning_cv
        ├── README.md
        └── data
        │   └── images
        │       ├── comparision_table.png
        │       ├── functional_flow.png
        │       ├── ls_rs_13.png
        │       ├── ls_rs_14.png
        │       ├── ls_rs_es_smu_13.png
        │       └── technical_design.png
    └── workshop
        ├── README.md
        ├── conda-local.yml
        ├── core
            ├── data_engineering
            │   ├── conda_feature_engineering.yml
            │   ├── feature_engineering.py
            │   └── feature_engineering.yml
            ├── evaluating
            │   ├── conda_ml_evaluating.yml
            │   ├── ml_evaluating.py
            │   └── ml_evaluating.yml
            ├── pipelines
            │   ├── adf
            │   │   └── adf_pipeline.json
            │   ├── batch_scoring_pipeline.yml
            │   ├── data_engineering_comp.yml
            │   └── training_pipeline.yml
            ├── scoring
            │   ├── batch_scoring
            │   │   ├── batch_score.py
            │   │   ├── conda.yml
            │   │   └── data_engineering.py
            │   ├── conda.yml
            │   ├── deployment.yml
            │   ├── endpoint.yml
            │   ├── score.py
            │   └── scoring_test_request.json
            └── training
            │   ├── conda_ml_training.yml
            │   ├── ml_training.py
            │   └── ml_training.yml
        ├── data
            ├── create_datasets.py
            └── linear_regression.joblib
        ├── documents
            ├── EZMLOps_introduction.pptx
            ├── IaC
            │   ├── createSP.azcli
            │   ├── iac_EZ_MLOps.json
            │   ├── iac_cc.yml
            │   ├── iac_ci.yml
            │   └── iac_mlopsworkshop.azcli
            ├── images
            │   ├── arm000.png
            │   ├── arm001.png
            │   ├── arm002.png
            │   ├── arm100.png
            │   ├── cicd.png
            │   ├── cloudshell-accept.png
            │   ├── cloudshell-firstlaunch.png
            │   ├── cloudshell.png
            │   ├── cloudshell2.png
            │   ├── deploy-to-azure.svg
            │   ├── github4000.png
            │   ├── github4001.png
            │   ├── github4002.png
            │   ├── github4003.png
            │   ├── github4004.png
            │   ├── github4005.png
            │   ├── github4006.png
            │   ├── github4007.png
            │   ├── github4008.png
            │   ├── monolithic_modular.png
            │   ├── part3cicd.png
            │   ├── run_mlopsworkshop_azcli000.png
            │   ├── run_mlopsworkshop_azcli001.png
            │   ├── run_mlopsworkshop_azcli002.png
            │   ├── run_mlopsworkshop_azcli003.png
            │   ├── run_mlopsworkshop_azcli004.png
            │   ├── run_mlopsworkshop_azcli005.png
            │   ├── run_mlopsworkshop_azcli006.png
            │   ├── run_mlopsworkshop_azcli007.png
            │   ├── run_mlopsworkshop_azcli008.png
            │   ├── run_mlopsworkshop_azcli009.png
            │   ├── run_mlopsworkshop_azcli010.png
            │   ├── training_pipeline.png
            │   └── video_img.png
            ├── part_0.md
            ├── part_1.md
            ├── part_2.md
            ├── part_3.md
            ├── part_4.md
            ├── part_5.md
            └── part_tips.md
        ├── infra
            └── conda.yml
        ├── notebooks
            └── taxi-tutorial.ipynb
        └── requirements-local.txt


/.github/actions/aml-endpoint-deploy/action.yaml:
--------------------------------------------------------------------------------
  1 | name: Deploy AzureML managed online endpoint
  2 | description: 'Deploys a model endpoint in Azure Machine Learning Services all along with all the deployments it contains. Logs are collected and uploaded.'
  3 | 
  4 | inputs:
  5 |   resourceGroup:
  6 |     description: 'Name of the resource group where the workspace is placed.'
  7 |     required: true
  8 |   workspaceName:
  9 |     description: 'Name of the workspace to work against.'
 10 |     required: true
 11 |   endpointFile:
 12 |     description: 'Path to the endpoint YAML file.'
 13 |     required: true
 14 |   deploymentFile:
 15 |     description: 'Path to the deployment YAML file for the given endpoint.'
 16 |     required: true
 17 |   modelVersion:
 18 |     description: 'Model version you want to deploy. Supports either a specific version number, or "latest". If not specified, using the deployment file model version.'
 19 |     required: false
 20 |     default: ''
 21 |   updateIfExists:
 22 |     description: 'If endpoint exists, update it instead of creating a new one.'
 23 |     required: false
 24 |     default: 'false'
 25 | outputs:
 26 |   deployedVersion:
 27 |     description: 'Deployed version of the model'
 28 |     value: ${{ steps.deployment.outputs.deployedVersion }}
 29 | 
 30 | runs:
 31 |   using: "composite"
 32 |   steps:
 33 |     - name: Deploy endpoint
 34 |       id: deployment
 35 |       shell: bash
 36 |       run: |
 37 |         set -e
 38 |         az configure --defaults workspace=${{ inputs.workspaceName }} group=${{ inputs.resourceGroup }}
 39 | 
 40 |         ENDPOINT_FILE=${{ inputs.endpointFile }}
 41 |         DEPLOYMENT_FILE=${{ inputs.deploymentFile }}
 42 | 
 43 |         ENDPOINT_NAME=$(yq -r ".name" $ENDPOINT_FILE)
 44 |         echo "Endpoint name: $ENDPOINT_NAME"
 45 | 
 46 |         # Removing traffic if present in endpoint config as we'll manage traffic setup as part of the safe rollout
 47 |         echo "Rewriting endpoint file without traffic"
 48 |         yq -y -i "del(.traffic)" $ENDPOINT_FILE
 49 | 
 50 |         # Create or update endpoint
 51 |         { 
 52 |             echo "Creating endpoint with name: $ENDPOINT_NAME" &&
 53 |             az ml online-endpoint create -f $ENDPOINT_FILE
 54 |         } || { 
 55 |             echo "Endpoint $ENDPOINT_NAME already exists"
 56 |             if [ ${{ inputs.updateIfExists }} == 'true' ]; then
 57 |                 echo "Updating endpoint with name: $ENDPOINT_NAME" &&
 58 |                 az ml online-endpoint update -f $ENDPOINT_FILE
 59 |             else
 60 |                 echo "Skipping update of endpoint with name: $ENDPOINT_NAME"
 61 |             fi
 62 |         }
 63 |             
 64 |         # Identify which slot should be used to stage this deployment based on current traffic
 65 |         echo "Reading endpoint traffic to identify target staging deployment slot"
 66 |         az ml online-endpoint show -n $ENDPOINT_NAME --query "traffic" -o yaml > _endpoint_traffic.yml
 67 |         echo "Endpoint traffic:"
 68 |         cat _endpoint_traffic.yml
 69 |         GREEN_TRAFFIC=$(yq .green _endpoint_traffic.yml)
 70 |         BLUE_TRAFFIC=$(yq .blue _endpoint_traffic.yml)
 71 |         if [[ $GREEN_TRAFFIC == null || $GREEN_TRAFFIC == 0 ]]; then
 72 |             STAGING_DEPLOYMENT_NAME='green';
 73 |         else
 74 |             if [[ $BLUE_TRAFFIC == null || $BLUE_TRAFFIC == 0 ]]; then
 75 |                 STAGING_DEPLOYMENT_NAME='blue';
 76 |             else
 77 |                 echo "::error::No staging slots available for endpoint $ENDPOINT_NAME. One of the green/blue slots needs to have 0% traffic.";
 78 |                 exit 1;
 79 |             fi
 80 |         fi
 81 |         echo "Selected staging deployment name: $STAGING_DEPLOYMENT_NAME"
 82 | 
 83 |         # Updating deployment file to setup name of deployment based on staging name selected above
 84 |         echo "Updating deployment name to $STAGING_DEPLOYMENT_NAME"
 85 |         if [[ $STAGING_DEPLOYMENT_NAME == "blue" ]]; then
 86 |           yq -y -i '.name= "blue"' $DEPLOYMENT_FILE;
 87 |         else
 88 |           yq -y -i '.name= "green"' $DEPLOYMENT_FILE;
 89 |         fi
 90 | 
 91 |         # Overwrite the model version set in the deployment file with a specific version or 'latest' if specified in the workflow
 92 |         DEPLOYMENT_MODEL=$(yq -r ".model" $DEPLOYMENT_FILE | cut -d: -f2)
 93 |         DEPLOYMENT_MODEL_VERSION=$(yq -r ".model" $DEPLOYMENT_FILE | cut -d: -f3)
 94 |         if [ -z "${{ inputs.modelVersion}}" ]; then
 95 |           TARGET_MODEL_VERSION=$DEPLOYMENT_MODEL_VERSION
 96 |         else
 97 |           echo "Model being targeted is being overwriten with version ${{ inputs.modelVersion}}"
 98 |           TARGET_MODEL_VERSION=${{ inputs.modelVersion}}
 99 |         fi
100 |         if [[ "$TARGET_MODEL_VERSION" == "latest" ]]; then
101 |           echo "Identifying latest version of the model $DEPLOYMENT_MODEL"
102 |           TARGET_MODEL_VERSION=$(az ml model list --name $DEPLOYMENT_MODEL | jq -r '.[0].version')
103 |           echo "Latest version of model $DEPLOYMENT_MODEL is $TARGET_MODEL_VERSION"
104 |         fi
105 |         if [[ $TARGET_MODEL_VERSION != $DEPLOYMENT_MODEL_VERSION ]]; then
106 |           echo "Updating deployment file with model version: $TARGET_MODEL_VERSION"
107 |           sed -i 's/:'$DEPLOYMENT_MODEL_VERSION'/:'$TARGET_MODEL_VERSION'/' $DEPLOYMENT_FILE 
108 |         fi
109 |         echo "::set-output name=deployedVersion::$TARGET_MODEL_VERSION"
110 | 
111 |         # Create deployment
112 |         echo "Creating deployment with name: $ENDPOINT_NAME/$STAGING_DEPLOYMENT_NAME"
113 |         az ml online-deployment create -f $DEPLOYMENT_FILE --only-show-errors --set tags.git_commit=${GITHUB_SHA}
114 |         echo "Deployment completed"
115 | 
116 |         # Saving logs
117 |         echo "Acquiring logs for deployment with name: $ENDPOINT_NAME/$STAGING_DEPLOYMENT_NAME"
118 |         mkdir -p logs
119 |         az ml online-deployment get-logs --name $STAGING_DEPLOYMENT_NAME --endpoint-name $ENDPOINT_NAME >> logs/$ENDPOINT_NAME_$STAGING_DEPLOYMENT_NAME.log
120 | 
121 |     - name: Upload deployment logs
122 |       uses: actions/upload-artifact@v2
123 |       if: ${{ (failure() || success()) }}
124 |       with:
125 |         name: deployment-logs
126 |         path: logs/*


--------------------------------------------------------------------------------
/.github/actions/aml-endpoint-swap/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Swap AzureML managed online endpoint deployments
 2 | description: 'Swaps green/blue deployments of an Azure ML endpoint by switching traffic around between endpoint deployments.'
 3 | 
 4 | inputs:
 5 |   resourceGroup:
 6 |     description: 'Name of the resource group where the workspace is placed.'
 7 |     required: true
 8 |   workspaceName:
 9 |     description: 'Name of the workspace to work against.'
10 |     required: true
11 |   endpointFile:
12 |     description: 'Path to the endpoint YAML file. Wildcard paths are supported which means that all matched endpoints will be deployed.'
13 |     required: true
14 | 
15 | runs:
16 |   using: "composite"
17 |   steps:
18 |     - name: Swap endpoint deployments
19 |       id: swap-deployments
20 |       shell: bash
21 |       run: |
22 |         set -e
23 |         az configure --defaults workspace=${{ inputs.workspaceName }} group=${{ inputs.resourceGroup }}
24 | 
25 |         ENDPOINT_FILE=${{ inputs.endpointFile }}
26 |         ENDPOINT_NAME=$(yq -r ".name" $ENDPOINT_FILE)
27 |         echo "ENDPOINT_FILE: $ENDPOINT_FILE"
28 |         echo "ENDPOINT_NAME: $ENDPOINT_NAME"
29 | 
30 |         echo "Reading endpoint traffic to figure out which deployment is staging/production"
31 |         az ml online-endpoint show -n $ENDPOINT_NAME --query "traffic" -o yaml > endpoint_traffic.yml
32 |         echo "Endpoint traffic:"
33 |         cat endpoint_traffic.yml
34 |         GREEN_TRAFFIC=$(yq .green endpoint_traffic.yml)
35 |         BLUE_TRAFFIC=$(yq .blue endpoint_traffic.yml)
36 | 
37 |         if [ $GREEN_TRAFFIC == null ]; then
38 |             if [ $BLUE_TRAFFIC == null ]; then
39 |                 echo "::error::No deployment slots available for endpoint $ENDPOINT_NAME. Nothing to swap.";
40 |                 exit 1;
41 |             else
42 |                 echo "Setting blue traffic to 100%"
43 |                 az ml online-endpoint update -n $ENDPOINT_NAME --traffic "blue=100"
44 |             fi
45 |         else
46 |             if [ $BLUE_TRAFFIC == null ]; then
47 |                 echo "Setting green traffic to 100%"
48 |                 az ml online-endpoint update -n $ENDPOINT_NAME --traffic "green=100"
49 |             else
50 |                 if [ $GREEN_TRAFFIC == 0 ]; then
51 |                   echo "Setting traffic to: green=100 blue=0"
52 |                   az ml online-endpoint update -n $ENDPOINT_NAME --traffic "green=100 blue=0"
53 |                 else
54 |                   echo "Setting traffic to: green=0 blue=100"
55 |                   az ml online-endpoint update -n $ENDPOINT_NAME --traffic "green=0 blue=100"
56 |                 fi
57 |             fi
58 |         fi


--------------------------------------------------------------------------------
/.github/actions/aml-endpoint-test/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Test AzureML managed online endpoint deployment (0% traffic deployment)
 2 | description: 'Finds 0% traffic deployment of an Azure ML endpoint and tests it.'
 3 | 
 4 | inputs:
 5 |   resourceGroup:
 6 |     description: 'Name of the resource group where the workspace is placed.'
 7 |     required: true
 8 |   workspaceName:
 9 |     description: 'Name of the workspace to work against.'
10 |     required: true
11 |   endpointFile:
12 |     description: 'Path to the endpoint YAML file. Wildcard paths are supported which means that all matched endpoints will be deployed.'
13 |     required: true
14 |   requestFile:
15 |     description: 'Name of the json test request file.'
16 |     required: true
17 | 
18 | runs:
19 |   using: "composite"
20 |   steps:
21 |     - name: Test endpoint deployments
22 |       id: test-deployment
23 |       shell: bash
24 |       run: |
25 |         set -e
26 |         az configure --defaults workspace=${{ inputs.workspaceName }} group=${{ inputs.resourceGroup }}
27 | 
28 |         ENDPOINT_FILE=${{ inputs.endpointFile }}
29 |         ENDPOINT_NAME=$(yq -r ".name" $ENDPOINT_FILE)
30 |         echo "ENDPOINT_FILE: $ENDPOINT_FILE"
31 |         echo "ENDPOINT_NAME: $ENDPOINT_NAME"
32 | 
33 |         echo "Reading endpoint traffic to figure out which deployment is staging/production"
34 |         az ml online-endpoint show -n $ENDPOINT_NAME --query "traffic" -o yaml > endpoint_traffic.yml
35 |         echo "Endpoint traffic:"
36 |         cat endpoint_traffic.yml
37 |         GREEN_TRAFFIC=$(yq .green endpoint_traffic.yml)
38 |         BLUE_TRAFFIC=$(yq .blue endpoint_traffic.yml)
39 |         if [ $GREEN_TRAFFIC == 0 ]; then
40 |           TEST_DEPLOYMENT_NAME='green'
41 |         fi
42 |         if [ $BLUE_TRAFFIC == 0 ]; then
43 |           TEST_DEPLOYMENT_NAME='blue'
44 |         fi
45 | 
46 |         TEST_RESPONSE=$(az ml online-endpoint invoke -n $ENDPOINT_NAME --deployment $TEST_DEPLOYMENT_NAME --request-file ${{ inputs.requestFile }})
47 |         # TODO: test that response is valid, fail with exit 1 if not


--------------------------------------------------------------------------------
/.github/actions/aml-job-create/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Submitting job
 2 | description: 'Creates and submit a new job to Azure ML based on a job configuration. Jobs are named using the provided job name and a unique run id returned by GitHub.'
 3 | 
 4 | inputs:
 5 |   # name:
 6 |   #   description: 'Name of the job to be created. Note that the final name of the job will be the given name followed by the number of the build run `github.run_id`. Thhis value is provided as an output.'
 7 |   #   required: true
 8 |   jobFile:
 9 |     description: 'Path to the job file.'
10 |     required: true
11 |   # workspaceName:
12 |   #   description: 'Name of the workspace to work against.'
13 |   #   required: true
14 |   # resourceGroup:
15 |   #   description: 'Name of the resource group where the workspace is placed.'
16 |   #   required: true
17 |   # noWait:
18 |   #   description: 'Indicates if the action should not wait for the job to finish.'
19 |   #   required: false
20 |   #   default: 'false'
21 | 
22 | # outputs:
23 | #   jobName:
24 | #     description: Name of the job name created in the workspace.
25 | #     value: ${{ steps.jobRun.outputs.jobName }}
26 | 
27 | runs:
28 |   using: "composite"
29 |   steps:
30 |     - name: Run AML Job
31 |       id: jobRun
32 |       shell: bash
33 |       run: |
34 |         run_id=$(az ml job create -f ${{ inputs.jobFile }} --query name -o tsv)
35 |         if [[ -z "$run_id" ]]
36 |         then
37 |           echo "Job creation failed"
38 |           exit 3
39 |         fi
40 |         az ml job show -n $run_id --web
41 |         status=$(az ml job show -n $run_id --query status -o tsv)
42 |         if [[ -z "$status" ]]
43 |         then
44 |           echo "Status query failed"
45 |           exit 4
46 |         fi
47 |         running=("Queued" "Starting" "Preparing" "Running" "Finalizing")
48 |         while [[ ${running[*]} =~ $status ]]
49 |         do
50 |           sleep 15 
51 |           status=$(az ml job show -n $run_id --query status -o tsv)
52 |           echo $status
53 |         done
54 |         if [[ "$status" = "Failed" ]]  
55 |         then
56 |           echo "Training Job failed"
57 |           exit 3
58 |         fi
59 | 


--------------------------------------------------------------------------------
/.github/workflows/workshop_cd.yml:
--------------------------------------------------------------------------------
 1 | name: workshop-cd
 2 | on:
 3 |   workflow_dispatch:
 4 |   pull_request:
 5 |     types:
 6 |       - opened
 7 |     branches: 
 8 |     - main
 9 |     paths:
10 |       - src/workshop/core/**
11 |       - .github/workflows/workshop_cd.yml
12 | jobs:
13 |   Workshop-Deployment:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 | 
17 |       - name: Check out repository code
18 |         uses: actions/checkout@v2
19 | 
20 |       - name: Setup python
21 |         uses: actions/setup-python@v2
22 |         with:
23 |           python-version: '3.8'
24 | 
25 |       - name: Upgrade pip
26 |         run: |
27 |           python -m pip install --upgrade pip
28 |           python -m pip install --upgrade build
29 |           python -m pip install --upgrade twine
30 | 
31 |       - name: AZ Login
32 |         uses: azure/login@v1
33 |         with:
34 |           creds: ${{ secrets.AZURE_SERVICE_PRINCIPAL }} #setup replace AZURE_SERVICE_PRINCIPAL with the name of your Azure credentials secret in GitHub
35 | 
36 |       - name: Install az ml & and tools
37 |         run: |
38 |           az extension add -n ml -y --version 2.2.1
39 |           sudo apt install jq
40 |           pip install yq
41 |       
42 |       - name: Run deployment
43 |         uses: ./.github/actions/aml-endpoint-deploy
44 |         with:
45 |           resourceGroup: azureml #setup replace azureml with the name of your resource group in Azure
46 |           workspaceName: ws01ent #setup replace ws01ent with the name of your workspace in Azure
47 |           endpointFile: src/workshop/core/scoring/endpoint.yml
48 |           deploymentFile: src/workshop/core/scoring/deployment.yml
49 |           modelVersion: latest
50 |       
51 |       - name: Test deployment
52 |         uses: ./.github/actions/aml-endpoint-test
53 |         with:
54 |           resourceGroup: azureml #setup replace azureml with the name of your resource group in Azure
55 |           workspaceName: ws01ent #setup replace ws01ent with the name of your workspace in Azure
56 |           endpointFile: src/workshop/core/scoring/endpoint.yml
57 |           requestFile: src/workshop/core/scoring/scoring_test_request.json
58 |   
59 |       - name: Swap deployment
60 |         uses: ./.github/actions/aml-endpoint-swap
61 |         with:
62 |           resourceGroup: azureml #setup replace azureml with the name of your resource group in Azure
63 |           workspaceName: ws01ent #setup replace ws01ent with the name of your workspace in Azure
64 |           endpointFile: src/workshop/core/scoring/endpoint.yml


--------------------------------------------------------------------------------
/.github/workflows/workshop_ci.yml:
--------------------------------------------------------------------------------
 1 | name: workshop-ci
 2 | on:
 3 |   workflow_dispatch:
 4 |   pull_request:
 5 |     types:
 6 |       - closed
 7 |     branches: 
 8 |     - integration
 9 |     paths:
10 |       - src/workshop/core/**
11 |       - .github/workflows/workshop_ci.yml
12 | jobs:
13 |   Workshop-Train-Validation:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Check out repository code
17 |         uses: actions/checkout@v3
18 |       - name: Setup python
19 |         uses: actions/setup-python@v2
20 |         with:
21 |           python-version: '3.8' # Version range or exact version of a Python version to use, using SemVer's version range syntax 
22 |       - name: Upgrade pip
23 |         run: |
24 |           python -m pip install --upgrade pip
25 |           python -m pip install --upgrade build
26 |           python -m pip install --upgrade twine
27 |       - name: AZ Login
28 |         uses: azure/login@v1
29 |         with:
30 |           creds: ${{ secrets.AZURE_SERVICE_PRINCIPAL }}   #setup: provide your Azure credentials name stored in github
31 | 
32 |       - name: Install az ml & set default values for AML
33 |         run: | #setup: provide group, workspace and location
34 |           az extension add -n ml -y --version 2.2.1
35 |           az configure --defaults group=azureml workspace=ws01ent location=westus2
36 |       - name: run training and model validation
37 |         run: |
38 |          az ml job create -s -f src/workshop/core/pipelines/training_pipeline.yml
39 | 
40 |       - name:  Create Pull Request to Main
41 |         uses: thomaseizinger/create-pull-request@master
42 |         with:
43 |           GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN_GITHUB }} #setup: provide your github secret name
44 |           head: ${{ github.ref }}
45 |           base: main
46 |           title: "An automatically created PR to main by successful CI"
47 | 
48 | 


--------------------------------------------------------------------------------
/.github/workflows/workshop_unit_test.yml:
--------------------------------------------------------------------------------
 1 | name: feature_engineering_unit_test
 2 | on:
 3 |   workflow_dispatch:
 4 |   push:
 5 |     branches-ignore:
 6 |       - main
 7 |       - integration
 8 |     paths:
 9 |       - src/workshop/core/data_engineering/*
10 |       - .github/workflows/workshop_unit_test.yml
11 | 
12 | jobs:
13 |   unit-test:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Check out repository code
17 |         uses: actions/checkout@v3
18 |       - name: Setup python
19 |         uses: actions/setup-python@v2
20 |         with:
21 |           python-version: '3.8' # Version range or exact version of a Python version to use, using SemVer's version range syntax 
22 |       - name: Upgrade pip
23 |         run: |
24 |           python -m pip install --upgrade pip
25 |           python -m pip install --upgrade build
26 |           python -m pip install --upgrade twine
27 |       - name: AZ Login
28 |         uses: azure/login@v1
29 |         with:
30 |           creds: ${{ secrets.AZURE_SERVICE_PRINCIPAL }} # SETUP: replace AZURE_SERVICE_PRINCIPAL with your own secret name    
31 |       - name: Install AZ ML and tools
32 |         run: | # SETUP line 34 to point to your own AML workspace
33 |           az extension add -n ml -y --version 2.2.1
34 |           az configure --defaults group=azureml workspace=ws01ent location=westus2
35 |       - name: Run Feature Engineering
36 |         uses: ./.github/actions/aml-job-create
37 |         with:
38 |           jobFile: src/workshop/core/data_engineering/feature_engineering.yml
39 | 
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | #
  2 | src/workshop/data/*.parquet
  3 | src/workshop/data/*.joblib
  4 | *.amlignore
  5 | *.amltmp
  6 | *.ipynb_aml_checkpoints
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | pip-wheel-metadata/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/cd.yml:
--------------------------------------------------------------------------------
  1 | # Azure DevOps Pipeline to Run a Databricks Job
  2 | # This uses bash scripts to invoke the Databricks API and start a job.
  3 | # First we use the service principal's credentials to get a token from Entra
  4 | # Then we use that token to make an HTTP call to the Databricks API
  5 | 
  6 | # This pipeline expects the following variables:
  7 | # - tenant_id:  The ID of your Entra tenant (should be a guid)
  8 | # - sp_client_id:  The service principal's client ID (should be a guid)
  9 | # - sp_credential:  The service principal's credential (should be marked as a secret)
 10 | # - databricks_workspace_uri:  The URI for the Databricks workspace (without the trailing slash)
 11 | # - ado_username: username for Azure DevOps with repo access to share with service principal
 12 | # - ado_username_pat: ADO personal_access_token for username
 13 | 
 14 | trigger:
 15 |   branches:
 16 |     exclude:
 17 |       - integration
 18 |     include:
 19 |       - main
 20 |   paths:
 21 |     include:
 22 |       - src/workshop/notebooks/part_1_1_data_prep.ipynb
 23 |       - src/workshop/notebooks/part_1_2_training.ipynb
 24 |       - src/workshop/notebooks/part_1_3_evaluating.ipynb
 25 |       - .azure_pipelines/cd.yml
 26 | 
 27 | pool:
 28 |   vmImage: ubuntu-latest
 29 | 
 30 | variables:
 31 |   - group: mlops-ado-adb-variables
 32 |   - name: BRANCH_NAME 
 33 |     value: $[replace(variables['Build.SourceBranch'], 'refs/heads/', '')]
 34 | 
 35 | steps:
 36 | - script: |
 37 |     token=$(curl -s -X POST -H 'Content-Type: application/x-www-form-urlencoded' \
 38 |     https://login.microsoftonline.com/$(tenant_id)/oauth2/v2.0/token \
 39 |     -d 'client_id=$(sp_client_id)' \
 40 |     -d 'grant_type=client_credentials' \
 41 |     -d 'scope=2ff814a6-3304-4ab8-85cb-cd0e6f879c1d%2F.default' \
 42 |     -d 'client_secret='"$SP_CREDENTIAL"'' \
 43 |     | jq -r '.access_token')
 44 | 
 45 |     echo "##vso[task.setvariable variable=token;issecret=true]$token"
 46 | 
 47 |   displayName: 'Get Entra ID token'
 48 |   env:
 49 |     SP_CREDENTIAL: $(sp_credential)
 50 | 
 51 | - script: |
 52 |     result=$(curl -s -X GET \
 53 |     -H 'Authorization: Bearer '"$(token)"'' \
 54 |     $(databricks_workspace_uri)/api/2.0/git-credentials)
 55 | 
 56 |     for cred in $(echo "${result}" | jq -c '.credentials[] | {credential_id}'); do
 57 |       echo "Deleting credentials"
 58 |       echo $cred
 59 |       cred_id=$(echo $cred | jq -r .credential_id)
 60 |       del_result=$(curl -s -X DELETE \
 61 |       -H 'Authorization: Bearer '"$(token)"'' \
 62 |       $(databricks_workspace_uri)/api/2.0/git-credentials/${cred_id})
 63 |     done
 64 | 
 65 |     result=$(curl -s -X POST \
 66 |     -H 'Authorization: Bearer '"$(token)"'' \
 67 |     -H 'Content-Type: application/json' \
 68 |     -d '{
 69 |           "git_provider": "AzureDevOpsServices",
 70 |           "personal_access_token": "$(ado_username_pat)",
 71 |           "git_username": "$(ado_username)"
 72 |         }' \
 73 |     $(databricks_workspace_uri)/api/2.0/git-credentials)
 74 | 
 75 |     echo $result
 76 | 
 77 |   displayName: 'Refresh Git Credentials'
 78 | 
 79 | - script: |
 80 |     cluster_def='{
 81 |             "spark_version": "13.2.x-cpu-ml-scala2.12",
 82 |             "spark_conf": {
 83 |                 "spark.databricks.delta.preview.enabled": "true",
 84 |                 "spark.master": "local[*, 4]",
 85 |                 "spark.databricks.cluster.profile": "singleNode"
 86 |             },
 87 |             "azure_attributes": {
 88 |                 "first_on_demand": 1,
 89 |                 "availability": "ON_DEMAND_AZURE",
 90 |                 "spot_bid_max_price": -1
 91 |             },
 92 |             "node_type_id": "Standard_D4a_v4",
 93 |             "driver_node_type_id": "Standard_D4a_v4",
 94 |             "custom_tags": {
 95 |                 "ResourceClass": "SingleNode"
 96 |             },
 97 |             "spark_env_vars": {
 98 |                 "PYSPARK_PYTHON": "/databricks/python3/bin/python3"
 99 |             },
100 |             "enable_elastic_disk": true,
101 |             "data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
102 |             "runtime_engine": "STANDARD",
103 |             "num_workers": 0
104 |           }'
105 | 
106 |     result=$(curl -s -X POST \
107 |     -H 'Authorization: Bearer '"$(token)"'' \
108 |     -H 'Content-Type: application/json' \
109 |     -d '{
110 |       "run_name": "Model Eval on Prod Data Workflow - '"$(BRANCH_NAME)"'",
111 |       "tasks": [    
112 |         {
113 |           "task_key": "model_evaluation",
114 |           "notebook_task": {
115 |             "notebook_path": "src/workshop/notebooks/part_1_3_evaluating",
116 |             "source": "GIT",
117 |             "base_parameters": {
118 |               "run_name": "'"$(BRANCH_NAME)"'",
119 |               "devops_action": "Deployment"
120 |             }
121 |           },
122 |           "new_cluster": '"$cluster_def"'
123 |         }
124 |       
125 |       ],
126 |       "git_source": {
127 |         "git_provider": "azureDevOpsServices",
128 |         "git_url": "'"$(System.CollectionUri)$(System.TeamProject)"/_git/"$(Build.Repository.Name)"'",
129 |         "git_branch": "'"$(BRANCH_NAME)"'"
130 |       },
131 |       "access_control_list": [
132 |         {
133 |           "group_name": "users",
134 |           "permission_level": "CAN_VIEW"
135 |         }
136 |       ]
137 |     }' \
138 |     $(databricks_workspace_uri)/api/2.1/jobs/runs/submit)
139 | 
140 |     echo Using Git URL: "'"$(System.CollectionUri)$(System.TeamProject)"/_git/"$(Build.Repository.Name)"'"
141 | 
142 |     echo $result
143 | 
144 |   displayName: 'Run Production Model Evaluation Databricks Workflow via API'


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/ci.yml:
--------------------------------------------------------------------------------
  1 | # Azure DevOps Pipeline to Run a Databricks Job
  2 | # This uses bash scripts to invoke the Databricks API and start a job.
  3 | # First we use the service principal's credentials to get a token from Entra
  4 | # Then we use that token to make an HTTP call to the Databricks API
  5 | 
  6 | # This pipeline expects the following variables:
  7 | # - tenant_id:  The ID of your Entra tenant (should be a guid)
  8 | # - sp_client_id:  The service principal's client ID (should be a guid)
  9 | # - sp_credential:  The service principal's credential (should be marked as a secret)
 10 | # - databricks_workspace_uri:  The URI for the Databricks workspace (without the trailing slash)
 11 | # - ado_username: username for Azure DevOps with repo access to share with service principal
 12 | # - ado_username_pat: ADO personal_access_token for username
 13 | 
 14 | trigger:
 15 |   branches:
 16 |     exclude:
 17 |       - main
 18 |     include:
 19 |       - integration
 20 |   paths:
 21 |     include:
 22 |       - src/workshop/notebooks/part_1_1_data_prep.ipynb
 23 |       - src/workshop/notebooks/part_1_2_training.ipynb
 24 |       - src/workshop/notebooks/part_1_3_evaluating.ipynb
 25 |       - .azure_pipelines/ci.yml
 26 | 
 27 | pool:
 28 |   vmImage: ubuntu-latest
 29 | 
 30 | variables:
 31 |   - group: mlops-ado-adb-variables
 32 |   - name: BRANCH_NAME 
 33 |     value: $[replace(variables['Build.SourceBranch'], 'refs/heads/', '')]
 34 | 
 35 | steps:
 36 | - script: |
 37 |     token=$(curl -s -X POST -H 'Content-Type: application/x-www-form-urlencoded' \
 38 |     https://login.microsoftonline.com/$(tenant_id)/oauth2/v2.0/token \
 39 |     -d 'client_id=$(sp_client_id)' \
 40 |     -d 'grant_type=client_credentials' \
 41 |     -d 'scope=2ff814a6-3304-4ab8-85cb-cd0e6f879c1d%2F.default' \
 42 |     -d 'client_secret='"$SP_CREDENTIAL"'' \
 43 |     | jq -r '.access_token')
 44 | 
 45 |     echo "##vso[task.setvariable variable=token;issecret=true]$token"
 46 | 
 47 |   displayName: 'Get Entra ID token'
 48 |   env:
 49 |     SP_CREDENTIAL: $(sp_credential)
 50 | 
 51 | - script: |
 52 |     result=$(curl -s -X GET \
 53 |     -H 'Authorization: Bearer '"$(token)"'' \
 54 |     $(databricks_workspace_uri)/api/2.0/git-credentials)
 55 | 
 56 |     for cred in $(echo "${result}" | jq -c '.credentials[] | {credential_id}'); do
 57 |       echo "Deleting credentials"
 58 |       echo $cred
 59 |       cred_id=$(echo $cred | jq -r .credential_id)
 60 |       del_result=$(curl -s -X DELETE \
 61 |       -H 'Authorization: Bearer '"$(token)"'' \
 62 |       $(databricks_workspace_uri)/api/2.0/git-credentials/${cred_id})
 63 |     done
 64 | 
 65 |     result=$(curl -s -X POST \
 66 |     -H 'Authorization: Bearer '"$(token)"'' \
 67 |     -H 'Content-Type: application/json' \
 68 |     -d '{
 69 |           "git_provider": "AzureDevOpsServices",
 70 |           "personal_access_token": "$(ado_username_pat)",
 71 |           "git_username": "$(ado_username)"
 72 |         }' \
 73 |     $(databricks_workspace_uri)/api/2.0/git-credentials)
 74 | 
 75 |     echo $result
 76 | 
 77 |   displayName: 'Refresh Git Credentials'
 78 | 
 79 | - script: |
 80 |     cluster_def='{
 81 |             "spark_version": "13.2.x-cpu-ml-scala2.12",
 82 |             "spark_conf": {
 83 |                 "spark.databricks.delta.preview.enabled": "true",
 84 |                 "spark.master": "local[*, 4]",
 85 |                 "spark.databricks.cluster.profile": "singleNode"
 86 |             },
 87 |             "azure_attributes": {
 88 |                 "first_on_demand": 1,
 89 |                 "availability": "ON_DEMAND_AZURE",
 90 |                 "spot_bid_max_price": -1
 91 |             },
 92 |             "node_type_id": "Standard_D4a_v4",
 93 |             "driver_node_type_id": "Standard_D4a_v4",
 94 |             "custom_tags": {
 95 |                 "ResourceClass": "SingleNode"
 96 |             },
 97 |             "spark_env_vars": {
 98 |                 "PYSPARK_PYTHON": "/databricks/python3/bin/python3"
 99 |             },
100 |             "enable_elastic_disk": true,
101 |             "data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
102 |             "runtime_engine": "STANDARD",
103 |             "num_workers": 0
104 |           }'
105 | 
106 |     result=$(curl -s -X POST \
107 |     -H 'Authorization: Bearer '"$(token)"'' \
108 |     -H 'Content-Type: application/json' \
109 |     -d '{
110 |       "run_name": "Model Training Workflow - '"$(BRANCH_NAME)"'",
111 |       "tasks": [
112 |         {
113 |           "task_key": "data_prep",
114 |           "notebook_task": {
115 |             "notebook_path": "src/workshop/notebooks/part_1_1_data_prep",
116 |             "source": "GIT",
117 |             "base_parameters": {
118 |               "run_name": "'"$(BRANCH_NAME)"'"
119 |             }
120 |           },
121 |           "new_cluster": '"$cluster_def"'
122 |         },
123 |         {
124 |           "task_key": "model_training",
125 |           "notebook_task": {
126 |             "notebook_path": "src/workshop/notebooks/part_1_2_training",
127 |             "source": "GIT",
128 |             "base_parameters": {
129 |               "run_name": "'"$(BRANCH_NAME)"'"
130 |             }
131 |           },
132 |           "depends_on": [ {"task_key": "data_prep"} ],
133 |           "new_cluster": '"$cluster_def"'
134 |         },
135 |         {
136 |           "task_key": "model_evaluation",
137 |           "notebook_task": {
138 |             "notebook_path": "src/workshop/notebooks/part_1_3_evaluating",
139 |             "source": "GIT",
140 |             "base_parameters": {
141 |               "run_name": "'"$(BRANCH_NAME)"'",
142 |               "devops_action": "Integration"
143 |             }
144 |           },
145 |           "depends_on": [ {"task_key": "model_training"} ],
146 |           "new_cluster": '"$cluster_def"'
147 |         }
148 |       
149 |       ],
150 |       "git_source": {
151 |         "git_provider": "azureDevOpsServices",
152 |         "git_url": "'"$(System.CollectionUri)$(System.TeamProject)"/_git/"$(Build.Repository.Name)"'",
153 |         "git_branch": "'"$(BRANCH_NAME)"'"
154 |       },
155 |       "access_control_list": [
156 |         {
157 |           "group_name": "users",
158 |           "permission_level": "CAN_VIEW"
159 |         }
160 |       ]
161 |     }' \
162 |     $(databricks_workspace_uri)/api/2.1/jobs/runs/submit)
163 | 
164 |     echo Using Git URL: "'"$(System.CollectionUri)$(System.TeamProject)"/_git/"$(Build.Repository.Name)"'"
165 | 
166 |     echo $result
167 | 
168 |   displayName: 'Run Model Training Databricks Workflow via API'
169 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/move_model.yml:
--------------------------------------------------------------------------------
 1 | trigger:
 2 |   branches:
 3 |     include:
 4 |     - dev
 5 |   paths:
 6 |     include:
 7 |     - src/*
 8 | 
 9 | variables:
10 | - template: variables.yml
11 | 
12 | pool:
13 |   vmImage: ubuntu-latest
14 | 
15 | stages:
16 | - stage: move_model
17 |   displayName: Model Model
18 |   jobs:
19 |   - job: move
20 |     steps:
21 |     - task: ShellScript@2
22 |       displayName: 'Install Requirements'
23 |       inputs: 
24 |        scriptPath: 'src/install_requirements.sh'
25 |     - template: templates/aml-model-register/step.yml
26 |       parameters:
27 |         azureServiceConnectionName: ${{ variables.azureServiceConnection }}
28 |         azureServiceConnectionNameTarget: ${{ variables.azureServiceConnectionProd }}
29 |         name: ${{ variables.name }}
30 |         workspaceName: ${{ variables.workspace }}
31 |         workspaceNameTarget: ${{ variables.workspaceProd }}
32 |         resourceGroup: ${{ variables.resourcegroup }}
33 |         resourceGroupTarget: ${{ variables.resourcegroupProd }}
34 |         modelPath: ${{ variables.modelPath }}


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/templates/aml-batch-score-deploy/step.yml:
--------------------------------------------------------------------------------
  1 | parameters:
  2 | - name: azureServiceConnectionName
  3 |   type: string
  4 | - name: endpointFile
  5 |   type: string
  6 | - name: deploymentFile
  7 |   type: string
  8 | - name: modelVersion
  9 |   type: string
 10 |   default:
 11 | - name: workspaceName
 12 |   type: string
 13 | - name: resourceGroup
 14 |   type: string
 15 | - name: noWait
 16 |   type: boolean
 17 |   default: false
 18 | - name: args
 19 |   type: string
 20 |   default: 
 21 | - name: secretsToKeyVault
 22 |   type: boolean
 23 |   default: false
 24 | - name: keyVaultName
 25 |   type: string
 26 |   default: 
 27 | 
 28 | steps:
 29 |   - task: AzureCLI@2
 30 |     name: deployment
 31 |     displayName: Deploying endpoint
 32 |     inputs:
 33 |       scriptType: bash
 34 |       scriptLocation: inlineScript
 35 |       azureSubscription: ${{ parameters.azureServiceConnectionName }}
 36 |       inlineScript: |
 37 |         set -e #Fail on errors
 38 |         az configure --defaults workspace=${{ parameters.workspaceName }} group=${{ parameters.resourceGroup }}
 39 |         ENDPOINT_FILES=$(find ${{ parameters.endpointFile }})
 40 |         for ENDPOINT_FILE in $ENDPOINT_FILES
 41 |         do
 42 |           ENDPOINT_FOLDER=$(dirname $ENDPOINT_FILE)
 43 |           ENDPOINT_NAME=$(yq -r ".name" $ENDPOINT_FILE)
 44 |           ENDPOINT_AUTH=$(yq -r ".auth_mode" $ENDPOINT_FILE)
 45 |           # We are removing traffic key since this has the chicken and the egg issue. If you place .traffic you have
 46 |           # to deploy the deployment first. But you can't deploy deployments without an endpoint.
 47 |           echo "##[debug]Rewriting endpoint file without traffic"
 48 |           yq -y "del(.traffic)" $ENDPOINT_FILE > $ENDPOINT_NAME.yml
 49 |           echo "##[debug]Creating endpoint with name: $ENDPOINT_NAME"
 50 |           if [[ $(az ml batch-endpoint show -n $ENDPOINT_NAME) ]]; then
 51 |             echo "##[debug]Endpoint $ENDPOINT_NAME already exits. Creation skipped."
 52 |             if [[ $(az ml batch-endpoint show -n $ENDPOINT_NAME | yq .auth_mode != "$ENDPOINT_AUTH") ]]; then
 53 |               echo "##vso[task.logissue type=warning;sourcepath=$ENDPOINT_FILE;]Endpoint $ENDPOINT_NAME indicates a different authentication method that requires redeployment."
 54 |             fi
 55 |           else
 56 |             az ml batch-endpoint create -f $ENDPOINT_NAME.yml
 57 |           fi
 58 |           # echo "##[debug]Retrieving URL and credentials"
 59 |           # SCORING_URI=$(az ml batch-endpoint show -n $ENDPOINT_NAME | jq -r ".scoring_uri")
 60 |           # SCORING_KEY=$(az ml batch-endpoint get-credentials -n $ENDPOINT_NAME -o tsv --query primaryKey)
 61 |             
 62 |           echo "##[debug]Looking for deployments in folder $ENDPOINT_FOLDER/${{ parameters.deploymentFile }}"
 63 |           DEPLOYMENT_FILES=$(find $ENDPOINT_FOLDER/${{ parameters.deploymentFile }})
 64 |             
 65 |           for DEPLOYMENT_FILE in $DEPLOYMENT_FILES
 66 |           do
 67 |             echo "##[debug]Working on deployment file $DEPLOYMENT_FILE"
 68 |             DEPLOYMENT_NAME=$(yq -r ".name" $DEPLOYMENT_FILE)
 69 |             DEPLOYMENT_MODEL=$(yq -r ".model" $DEPLOYMENT_FILE | cut -d: -f2)
 70 |             DEPLOYMENT_MODEL_VERSION=$(yq -r ".model" $DEPLOYMENT_FILE | cut -d: -f3)
 71 |             # User can overwrite the version in the YAML 
 72 |             if [[ "${{ parameters.modelVersion }}" == "" ]]; then
 73 |               TARGET_MODEL_VERSION=$DEPLOYMENT_MODEL_VERSION
 74 |             else
 75 |               echo "##[debug]Model being targeted is being overwriten with version ${{ parameters.modelVersion }}"
 76 |               TARGET_MODEL_VERSION=${{ parameters.modelVersion }}
 77 |             fi
 78 |             
 79 |             echo "##[debug]Working on deployment with name: $ENDPOINT_NAME/$DEPLOYMENT_NAME"
 80 |             if [[ "$TARGET_MODEL_VERSION" == "current" ]]; then
 81 |               echo "##[debug]Identifying current version of the model at deployment $ENDPOINT_NAME/$DEPLOYMENT_NAME"
 82 |               MODEL_CURRENT_URL=$(az ml batch-deployment show --name $DEPLOYMENT_NAME --endpoint-name $ENDPOINT_NAME -g ${{ parameters.resourceGroup }} -w ${{ parameters.workspaceName }} | jq -r ".model")
 83 |               MODEL_CURRENT=$(basename $MODEL_CURRENT_URL)
 84 |               echo "##[debug]Updating yaml files with current model version: $MODEL_CURRENT"
 85 |               sed -i 's/:'$DEPLOYMENT_MODEL_VERSION'/:'$MODEL_CURRENT'/' $DEPLOYMENT_FILE
 86 |             fi
 87 |             if [[ "$TARGET_MODEL_VERSION" == "latest" ]]; then
 88 |               echo "##[debug]Identifying latest version of the model $DEPLOYMENT_MODEL"
 89 |               MODEL_LATEST=$(az ml model list --name $DEPLOYMENT_MODEL -g ${{ parameters.resourceGroup }} -w ${{ parameters.workspaceName }} | jq -r '.[0].version')
 90 |               
 91 |               echo "##[debug]Updating yaml files with latest model version: $MODEL_LATEST"
 92 |               sed -i 's/:'$DEPLOYMENT_MODEL_VERSION'/:'$MODEL_LATEST'/' $DEPLOYMENT_FILE 
 93 |             fi
 94 |             if [[ "$TARGET_MODEL_VERSION" == *=* ]]; then
 95 |               echo "##[debug]Identifying version of the model $DEPLOYMENT_MODEL with tags $TARGET_MODEL_VERSION"
 96 |               TARGET_MODEL_TAG=$(echo $TARGET_MODEL_VERSION | cut -d= -f1)
 97 |               TARGET_MODEL_TVALUE=$(echo $TARGET_MODEL_VERSION | cut -d= -f2)
 98 |               MODEL_TAGGED=$(az ml model list -n $DEPLOYMENT_MODEL | jq -r --arg TARGET_MODEL_TAG $TARGET_MODEL_TAG --arg TARGET_MODEL_TVALUE $TARGET_MODEL_TVALUE '.[] | select(.tags[$TARGET_MODEL_TAG] == $TARGET_MODEL_TVALUE) | .version')
 99 |               echo "##[debug]Updating yaml files with model version: $MODEL_TAGGED"
100 |               sed -i 's/:'$DEPLOYMENT_MODEL_VERSION'/:'$MODEL_TAGGED'/' $DEPLOYMENT_FILE 
101 |             fi
102 |         
103 |             echo "##[debug]Creating deployment with name: $ENDPOINT_NAME/$DEPLOYMENT_NAME"
104 |             if ${{ lower(parameters.noWait) }}; then
105 |               az ml batch-deployment create -f $DEPLOYMENT_FILE --only-show-errors --no-wait --set-default
106 |             else
107 |               az ml batch-deployment create -f $DEPLOYMENT_FILE  --set-default
108 |   
109 | 
110 |             fi
111 | 
112 |             # echo "##[debug]Updating properties for deployment"
113 |             # BRANCH_REF=$(Build.SourceBranch)
114 |             # az ml batch-deployment update --name $DEPLOYMENT_NAME --endpoint-name $ENDPOINT_NAME --set tags.'Git commit'=$(Build.SourceVersion)
115 |             # az ml batch-deployment update --name $DEPLOYMENT_NAME --endpoint-name $ENDPOINT_NAME --set tags.'Git branch'=${BRANCH_REF#refs/*/}
116 |             # az ml batch-deployment update --name $DEPLOYMENT_NAME --endpoint-name $ENDPOINT_NAME --set tags.'Git repository'=$(Build.Repository.Uri)
117 | 
118 |             echo "##[debug]Deployment completed"
119 |           done
120 |         
121 |           # if ${{ lower(parameters.secretsToKeyVault) }}; then
122 |           #   echo "##[debug]Uploading secrets to key vault ${{ parameters.keyVaultName }}"
123 |           #   az keyvault secret set --vault-name ${{ parameters.keyVaultName }} --name ${ENDPOINT_NAME//-/}ScoringUrl --value $SCORING_URI
124 |           #   az keyvault secret set --vault-name ${{ parameters.keyVaultName }} --name ${ENDPOINT_NAME//-/}ScoringKey --value $SCORING_KEY
125 |           # fi
126 | 
127 |           echo "##[debug]Getting deployed version for model at file $DEPLOYMENT_FILE"
128 |           DEPLOYED_VERSION=$(yq -r ".model" $DEPLOYMENT_FILE | cut -d: -f3)
129 |           echo "##vso[task.setvariable variable=deployedVersion;isOutput=true]$DEPLOYED_VERSION" 
130 |           echo "##[debug]Deployed version is: $DEPLOYED_VERSION"
131 | 
132 |           echo "##[debug]Endpoint evaluation completed"
133 |         done


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/templates/aml-job-create/step.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 | - name: azureServiceConnectionName
 3 |   type: string
 4 | - name: name
 5 |   type: string
 6 | - name: jobFile
 7 |   type: string
 8 | - name: workspaceName
 9 |   type: string
10 | - name: resourceGroup
11 |   type: string
12 | - name: noWait
13 |   type: boolean
14 |   default: false
15 | - name: stepDisplayName
16 |   type: string
17 |   default: Submitting job
18 | 
19 | steps:
20 |   - task: AzureCLI@2
21 |     name: jobRun
22 |     displayName: ${{ parameters.stepDisplayName }}
23 |     inputs:
24 |       scriptType: bash
25 |       scriptLocation: inlineScript
26 |       azureSubscription: ${{ parameters.azureServiceConnectionName }}
27 |       inlineScript: |
28 |         JOB_NAME="${{ parameters.name }}-$(Build.BuildId)"
29 |         echo "##[debug]Creating job with name: $JOB_NAME" 
30 |         echo "##vso[task.setvariable variable=jobName;isOutput=true]$JOB_NAME" 
31 |         if ${{ lower(parameters.noWait) }}; then
32 |           az ml job create -n $JOB_NAME -f ${{ parameters.jobFile }} --resource-group ${{ parameters.resourceGroup }} --workspace-name ${{ parameters.workspaceName }}
33 |         else
34 |           az ml job create -n $JOB_NAME -f ${{ parameters.jobFile }} --resource-group ${{ parameters.resourceGroup }} --workspace-name ${{ parameters.workspaceName }} --stream >> job.log
35 |         fi
36 |     target:
37 |       settableVariables:
38 |       - jobName
39 |   - task: PublishPipelineArtifact@1
40 |     displayName: Uploading job logs
41 |     condition: and(succeededOrFailed(), and(eq('${{ parameters.noWait }}', 'false'), ne(variables['jobRun.jobName'], '')))
42 |     inputs:
43 |       artifactName: ${{ parameters.name }}-log
44 |       targetPath: job.log


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/templates/aml-model-compare/evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/.azure_pipelines/templates/aml-model-compare/evaluate.py


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/templates/aml-model-compare/step.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/.azure_pipelines/templates/aml-model-compare/step.yml


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/templates/aml-model-register/step.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 | - name: azureServiceConnectionName
 3 |   type: string
 4 | - name: name
 5 |   type: string
 6 | - name: modelPath
 7 |   type: string
 8 | - name: workspaceName
 9 |   type: string
10 | - name: resourceGroup
11 |   type: string
12 | - name: azureServiceConnectionNameTarget
13 |   type: string
14 | - name: workspaceNameTarget
15 |   type: string
16 | - name: resourceGroupTarget
17 |   type: string
18 | steps:
19 |   - task: AzureCLI@2
20 |     displayName: Downloading model
21 |     inputs:
22 |       scriptType: bash
23 |       scriptLocation: inlineScript
24 |       azureSubscription: ${{ parameters.azureServiceConnectionName }}
25 |       workingDirectory: $(System.DefaultWorkingDirectory)
26 |       inlineScript: |
27 |           MODEL_LATEST=$(az ml model list --name ${{ parameters.name }} -g ${{ parameters.resourceGroup }} -w ${{ parameters.workspaceName }} | jq -r '.[0].version')
28 |           az ml model download --name ${{ parameters.name }} --version $MODEL_LATEST --resource-group ${{ parameters.resourceGroup }} --workspace-name ${{ parameters.workspaceName }}
29 |   - task: AzureCLI@2
30 |     displayName: Registering model
31 |     inputs:
32 |       scriptType: bash
33 |       scriptLocation: inlineScript
34 |       azureSubscription: ${{ parameters.azureServiceConnectionNameTarget }}
35 |       workingDirectory: $(System.DefaultWorkingDirectory)
36 |       inlineScript: |
37 |           az ml model create --name "${{ parameters.name }}" --path "${{ parameters.name}}/${{ parameters.modelPath }}" --resource-group ${{ parameters.resourceGroupTarget }} --workspace-name ${{ parameters.workspaceNameTarget }}
38 | 
39 | 
40 | 
41 |         


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/templates/aml-model-register/step2.yml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 | - name: azureServiceConnectionName
 3 |   type: string
 4 | - name: name
 5 |   type: string
 6 | - name: description
 7 |   type: string
 8 |   default: 
 9 | - name: fromJob
10 |   type: boolean
11 |   default: false
12 | - name: fromAnotherWorkspace
13 |   type: boolean
14 |   default: false
15 | - name: jobName
16 |   type: string
17 |   default: 
18 | - name: modelPath
19 |   type: string
20 | - name: modelType
21 |   type: string
22 |   default: custom_model
23 | - name: workspaceName
24 |   type: string
25 |   default: 
26 | - name: resourceGroup
27 |   type: string
28 |   default:
29 | steps:
30 |   - task: AzureCLI@2
31 |     displayName: Registering model
32 |     inputs:
33 |       scriptType: bash
34 |       scriptLocation: inlineScript
35 |       azureSubscription: ${{ parameters.azureServiceConnectionName }}
36 |       inlineScript: |
37 |         if ${{ lower(parameters.fromJob) }}; then
38 |           if ${{ lower(parameters.fromAnotherWorkspace) }}; then
39 |             echo "##[debug]Downloading assets from job ${{ parameters.jobName }}"
40 |             az ml job download --name ${{ parameters.jobName }} --resource-group ${{ parameters.resourceGroup }} --workspace-name ${{ parameters.workspaceName }}
41 |             echo "##[debug]$(ls)"
42 |             MODEL_PATH="${{ parameters.jobName }}/${{ parameters.modelPath }}"
43 |           else
44 |             MODEL_PATH="azureml://jobs/${{ parameters.jobName }}/outputs/artifacts/${{ parameters.modelPath }}"
45 |           fi
46 |         else
47 |           if test -f "${{ parameters.jobName }}/${{ parameters.modelPath }}"; then
48 |             echo "##vso[task.logissue type=error]File ${{ parameters.modelPath }} not found."
49 |             exit 1
50 |           fi
51 |           MODEL_PATH="${{ parameters.modelPath }}"
52 |         fi
53 |         echo "##[debug]Creating model from path $MODEL_PATH"
54 |         az ml model create --name "${{ parameters.name }}" --description "${{ parameters.description }}" --type "${{ parameters.modelType }}" --path $MODEL_PATH --resource-group ${{ parameters.resourceGroup }} --workspace-name ${{ parameters.workspaceName }}
55 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/templates/devops-create-pr/step.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/.azure_pipelines/templates/devops-create-pr/step.yml


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/variables.yml:
--------------------------------------------------------------------------------
 1 | variables:
 2 |   azureServiceConnection: "mlws-sp"
 3 |   jobname: "auto-ml-train"
 4 |   workspace: "mlws"
 5 |   resourcegroup: "trial"
 6 |   name: "regmodel"
 7 |   azureServiceConnectionProd: "mcw-sp"
 8 |   workspaceProd: "quick-start-ws"
 9 |   resourcegroupProd: "MCW-MLOps"
10 |   modelPath: "model"


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.azure_pipelines/workshop_unit_test.yml:
--------------------------------------------------------------------------------
  1 | # Azure DevOps Pipeline to Run a Databricks Job
  2 | # This uses bash scripts to invoke the Databricks API and start a job.
  3 | # First we use the service principal's credentials to get a token from Entra
  4 | # Then we use that token to make an HTTP call to the Databricks API.
  5 | 
  6 | # When we run the notebook, we want to pull the latest version of it from the AzDO repo.
  7 | # To do this, we would like to use the SP's credentials to pull the files from Git.
  8 | # AzDO now supports SP's connecting to repos... but Databricks does not yet support
  9 | # this in their git client.  Therefore, we still have to use a PAT for a regular
 10 | # AzDO user.  :-(
 11 | 
 12 | # This pipeline expects the following variables:
 13 | # - tenant_id:  The ID of your Entra tenant (should be a guid)
 14 | # - sp_client_id:  The service principal's client ID (should be a guid)
 15 | # - sp_credential:  The service principal's credential (should be marked as a secret)
 16 | # - databricks_workspace_uri:  The URI for the Databricks workspace (without the trailing slash)
 17 | # - ado_username: username for Azure DevOps with repo access to share with service principal
 18 | # - ado_username_pat: ADO personal_access_token for username
 19 | 
 20 | trigger:
 21 |   branches:
 22 |     exclude:
 23 |       - main
 24 |       - integration
 25 |   paths:
 26 |     include:
 27 |       - src/workshop/notebooks/part_1_1_data_prep.ipynb
 28 |       - src/workshop/notebooks/test_params.py
 29 |       - .azure_pipelines/workshop_unit_test.yml
 30 | 
 31 | pool:
 32 |   vmImage: ubuntu-latest
 33 | 
 34 | variables:
 35 |   - group: mlops-ado-adb-variables
 36 |   - name: BRANCH_NAME
 37 |     value: $[replace(variables['Build.SourceBranch'], 'refs/heads/', '')]
 38 | 
 39 | steps:
 40 | - script: |
 41 |     token=$(curl -s -X POST -H 'Content-Type: application/x-www-form-urlencoded' \
 42 |     https://login.microsoftonline.com/$(tenant_id)/oauth2/v2.0/token \
 43 |     -d 'client_id=$(sp_client_id)' \
 44 |     -d 'grant_type=client_credentials' \
 45 |     -d 'scope=2ff814a6-3304-4ab8-85cb-cd0e6f879c1d%2F.default' \
 46 |     -d 'client_secret='"$SP_CREDENTIAL"'' \
 47 |     | jq -r '.access_token')
 48 | 
 49 |     echo "##vso[task.setvariable variable=token;issecret=true]$token"
 50 | 
 51 |   displayName: 'Get Entra ID token'
 52 |   env:
 53 |     SP_CREDENTIAL: $(sp_credential)
 54 | 
 55 | - script: |
 56 |     result=$(curl -s -X GET \
 57 |     -H 'Authorization: Bearer '"$(token)"'' \
 58 |     $(databricks_workspace_uri)/api/2.0/git-credentials)
 59 | 
 60 |     for cred in $(echo "${result}" | jq -c '.credentials[] | {credential_id}'); do
 61 |       echo "Deleting credentials"
 62 |       echo $cred
 63 |       cred_id=$(echo $cred | jq -r .credential_id)
 64 |       del_result=$(curl -s -X DELETE \
 65 |       -H 'Authorization: Bearer '"$(token)"'' \
 66 |       $(databricks_workspace_uri)/api/2.0/git-credentials/${cred_id})
 67 |     done
 68 | 
 69 |     result=$(curl -s -X POST \
 70 |     -H 'Authorization: Bearer '"$(token)"'' \
 71 |     -H 'Content-Type: application/json' \
 72 |     -d '{
 73 |           "git_provider": "AzureDevOpsServices",
 74 |           "personal_access_token": "$(ado_username_pat)",
 75 |           "git_username": "$(ado_username)"
 76 |         }' \
 77 |     $(databricks_workspace_uri)/api/2.0/git-credentials)
 78 | 
 79 |     echo $result
 80 | 
 81 |   displayName: 'Refresh Git Credentials'
 82 | 
 83 | - script: |
 84 |     cluster_def='{
 85 |             "spark_version": "13.2.x-cpu-ml-scala2.12",
 86 |             "spark_conf": {
 87 |                 "spark.databricks.delta.preview.enabled": "true",
 88 |                 "spark.master": "local[*, 4]",
 89 |                 "spark.databricks.cluster.profile": "singleNode"
 90 |             },
 91 |             "azure_attributes": {
 92 |                 "first_on_demand": 1,
 93 |                 "availability": "ON_DEMAND_AZURE",
 94 |                 "spot_bid_max_price": -1
 95 |             },
 96 |             "node_type_id": "Standard_D4a_v4",
 97 |             "driver_node_type_id": "Standard_D4a_v4",
 98 |             "custom_tags": {
 99 |                 "ResourceClass": "SingleNode"
100 |             },
101 |             "spark_env_vars": {
102 |                 "PYSPARK_PYTHON": "/databricks/python3/bin/python3"
103 |             },
104 |             "enable_elastic_disk": true,
105 |             "data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
106 |             "runtime_engine": "STANDARD",
107 |             "num_workers": 0
108 |           }'
109 | 
110 |     result=$(curl -s -X POST \
111 |     -H 'Authorization: Bearer '"$(token)"'' \
112 |     -H 'Content-Type: application/json' \
113 |     -d '{
114 |       "run_name": "Data Prep Unit Test Pipeline - '"$(BRANCH_NAME)"'",
115 |       "tasks": [
116 |         {
117 |           "task_key": "data_prep",
118 |           "notebook_task": {
119 |             "notebook_path": "src/workshop/notebooks/part_1_1_data_prep",
120 |             "source": "GIT",
121 |             "base_parameters": {
122 |               "run_name": "'"$(BRANCH_NAME)"'"
123 |             }
124 |           },
125 |           "new_cluster": '"$cluster_def"'
126 |         }
127 |       ],
128 |       "git_source": {
129 |         "git_provider": "azureDevOpsServices",
130 |         "git_url": "'"$(System.CollectionUri)$(System.TeamProject)"/_git/"$(Build.Repository.Name)"'",
131 |         "git_branch": "'"$(BRANCH_NAME)"'"
132 |       },
133 |       "access_control_list": [
134 |         {
135 |           "group_name": "users",
136 |           "permission_level": "CAN_VIEW"
137 |         }
138 |       ]
139 |     }' \
140 |     $(databricks_workspace_uri)/api/2.1/jobs/runs/submit)
141 | 
142 |     echo Using Git URL: "'"$(System.CollectionUri)$(System.TeamProject)"/_git/"$(Build.Repository.Name)"'"
143 | 
144 |     echo $result
145 | 
146 |   displayName: 'Run Databricks notebook via API'
147 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.github/actions/aml-endpoint-deploy/action.yaml:
--------------------------------------------------------------------------------
  1 | name: Deploy AzureML managed online endpoint
  2 | description: 'Deploys a model endpoint in Azure Machine Learning Services all along with all the deployments it contains. Logs are collected and uploaded.'
  3 | 
  4 | inputs:
  5 |   resourceGroup:
  6 |     description: 'Name of the resource group where the workspace is placed.'
  7 |     required: true
  8 |   workspaceName:
  9 |     description: 'Name of the workspace to work against.'
 10 |     required: true
 11 |   endpointFile:
 12 |     description: 'Path to the endpoint YAML file.'
 13 |     required: true
 14 |   deploymentFile:
 15 |     description: 'Path to the deployment YAML file for the given endpoint.'
 16 |     required: true
 17 |   modelVersion:
 18 |     description: 'Model version you want to deploy. Supports either a specific version number, or "latest". If not specified, using the deployment file model version.'
 19 |     required: false
 20 |     default: ''
 21 |   updateIfExists:
 22 |     description: 'If endpoint exists, update it instead of creating a new one.'
 23 |     required: false
 24 |     default: 'false'
 25 | outputs:
 26 |   deployedVersion:
 27 |     description: 'Deployed version of the model'
 28 |     value: ${{ steps.deployment.outputs.deployedVersion }}
 29 | 
 30 | runs:
 31 |   using: "composite"
 32 |   steps:
 33 |     - name: Deploy endpoint
 34 |       id: deployment
 35 |       shell: bash
 36 |       run: |
 37 |         set -e
 38 |         az configure --defaults workspace=${{ inputs.workspaceName }} group=${{ inputs.resourceGroup }}
 39 | 
 40 |         ENDPOINT_FILE=${{ inputs.endpointFile }}
 41 |         DEPLOYMENT_FILE=${{ inputs.deploymentFile }}
 42 | 
 43 |         ENDPOINT_NAME=$(yq -r ".name" $ENDPOINT_FILE)
 44 |         echo "Endpoint name: $ENDPOINT_NAME"
 45 | 
 46 |         # Removing traffic if present in endpoint config as we'll manage traffic setup as part of the safe rollout
 47 |         echo "Rewriting endpoint file without traffic"
 48 |         yq -y -i "del(.traffic)" $ENDPOINT_FILE
 49 | 
 50 |         # Create or update endpoint
 51 |         { 
 52 |             echo "Creating endpoint with name: $ENDPOINT_NAME" &&
 53 |             az ml online-endpoint create -f $ENDPOINT_FILE
 54 |         } || { 
 55 |             echo "Endpoint $ENDPOINT_NAME already exists"
 56 |             if [ ${{ inputs.updateIfExists }} == 'true' ]; then
 57 |                 echo "Updating endpoint with name: $ENDPOINT_NAME" &&
 58 |                 az ml online-endpoint update -f $ENDPOINT_FILE
 59 |             else
 60 |                 echo "Skipping update of endpoint with name: $ENDPOINT_NAME"
 61 |             fi
 62 |         }
 63 |             
 64 |         # Identify which slot should be used to stage this deployment based on current traffic
 65 |         echo "Reading endpoint traffic to identify target staging deployment slot"
 66 |         az ml online-endpoint show -n $ENDPOINT_NAME --query "traffic" -o yaml > _endpoint_traffic.yml
 67 |         echo "Endpoint traffic:"
 68 |         cat _endpoint_traffic.yml
 69 |         GREEN_TRAFFIC=$(yq .green _endpoint_traffic.yml)
 70 |         BLUE_TRAFFIC=$(yq .blue _endpoint_traffic.yml)
 71 |         if [[ $GREEN_TRAFFIC == null || $GREEN_TRAFFIC == 0 ]]; then
 72 |             STAGING_DEPLOYMENT_NAME='green';
 73 |         else
 74 |             if [[ $BLUE_TRAFFIC == null || $BLUE_TRAFFIC == 0 ]]; then
 75 |                 STAGING_DEPLOYMENT_NAME='blue';
 76 |             else
 77 |                 echo "::error::No staging slots available for endpoint $ENDPOINT_NAME. One of the green/blue slots needs to have 0% traffic.";
 78 |                 exit 1;
 79 |             fi
 80 |         fi
 81 |         echo "Selected staging deployment name: $STAGING_DEPLOYMENT_NAME"
 82 | 
 83 |         # Updating deployment file to setup name of deployment based on staging name selected above
 84 |         echo "Updating deployment name to $STAGING_DEPLOYMENT_NAME"
 85 |         if [[ $STAGING_DEPLOYMENT_NAME == "blue" ]]; then
 86 |           yq -y -i '.name= "blue"' $DEPLOYMENT_FILE;
 87 |         else
 88 |           yq -y -i '.name= "green"' $DEPLOYMENT_FILE;
 89 |         fi
 90 | 
 91 |         # Overwrite the model version set in the deployment file with a specific version or 'latest' if specified in the workflow
 92 |         DEPLOYMENT_MODEL=$(yq -r ".model" $DEPLOYMENT_FILE | cut -d: -f2)
 93 |         DEPLOYMENT_MODEL_VERSION=$(yq -r ".model" $DEPLOYMENT_FILE | cut -d: -f3)
 94 |         if [ -z "${{ inputs.modelVersion}}" ]; then
 95 |           TARGET_MODEL_VERSION=$DEPLOYMENT_MODEL_VERSION
 96 |         else
 97 |           echo "Model being targeted is being overwriten with version ${{ inputs.modelVersion}}"
 98 |           TARGET_MODEL_VERSION=${{ inputs.modelVersion}}
 99 |         fi
100 |         if [[ "$TARGET_MODEL_VERSION" == "latest" ]]; then
101 |           echo "Identifying latest version of the model $DEPLOYMENT_MODEL"
102 |           TARGET_MODEL_VERSION=$(az ml model list --name $DEPLOYMENT_MODEL | jq -r '.[0].version')
103 |           echo "Latest version of model $DEPLOYMENT_MODEL is $TARGET_MODEL_VERSION"
104 |         fi
105 |         if [[ $TARGET_MODEL_VERSION != $DEPLOYMENT_MODEL_VERSION ]]; then
106 |           echo "Updating deployment file with model version: $TARGET_MODEL_VERSION"
107 |           sed -i 's/:'$DEPLOYMENT_MODEL_VERSION'/:'$TARGET_MODEL_VERSION'/' $DEPLOYMENT_FILE 
108 |         fi
109 |         echo "::set-output name=deployedVersion::$TARGET_MODEL_VERSION"
110 | 
111 |         # Create deployment
112 |         echo "Creating deployment with name: $ENDPOINT_NAME/$STAGING_DEPLOYMENT_NAME"
113 |         az ml online-deployment create -f $DEPLOYMENT_FILE --only-show-errors --set tags.git_commit=${GITHUB_SHA}
114 |         echo "Deployment completed"
115 | 
116 |         # Saving logs
117 |         echo "Acquiring logs for deployment with name: $ENDPOINT_NAME/$STAGING_DEPLOYMENT_NAME"
118 |         mkdir -p logs
119 |         az ml online-deployment get-logs --name $STAGING_DEPLOYMENT_NAME --endpoint-name $ENDPOINT_NAME >> logs/$ENDPOINT_NAME_$STAGING_DEPLOYMENT_NAME.log
120 | 
121 |     - name: Upload deployment logs
122 |       uses: actions/upload-artifact@v2
123 |       if: ${{ (failure() || success()) }}
124 |       with:
125 |         name: deployment-logs
126 |         path: logs/*


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.github/actions/aml-endpoint-swap/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Swap AzureML managed online endpoint deployments
 2 | description: 'Swaps green/blue deployments of an Azure ML endpoint by switching traffic around between endpoint deployments.'
 3 | 
 4 | inputs:
 5 |   resourceGroup:
 6 |     description: 'Name of the resource group where the workspace is placed.'
 7 |     required: true
 8 |   workspaceName:
 9 |     description: 'Name of the workspace to work against.'
10 |     required: true
11 |   endpointFile:
12 |     description: 'Path to the endpoint YAML file. Wildcard paths are supported which means that all matched endpoints will be deployed.'
13 |     required: true
14 | 
15 | runs:
16 |   using: "composite"
17 |   steps:
18 |     - name: Swap endpoint deployments
19 |       id: swap-deployments
20 |       shell: bash
21 |       run: |
22 |         set -e
23 |         az configure --defaults workspace=${{ inputs.workspaceName }} group=${{ inputs.resourceGroup }}
24 | 
25 |         ENDPOINT_FILE=${{ inputs.endpointFile }}
26 |         ENDPOINT_NAME=$(yq -r ".name" $ENDPOINT_FILE)
27 |         echo "ENDPOINT_FILE: $ENDPOINT_FILE"
28 |         echo "ENDPOINT_NAME: $ENDPOINT_NAME"
29 | 
30 |         echo "Reading endpoint traffic to figure out which deployment is staging/production"
31 |         az ml online-endpoint show -n $ENDPOINT_NAME --query "traffic" -o yaml > endpoint_traffic.yml
32 |         echo "Endpoint traffic:"
33 |         cat endpoint_traffic.yml
34 |         GREEN_TRAFFIC=$(yq .green endpoint_traffic.yml)
35 |         BLUE_TRAFFIC=$(yq .blue endpoint_traffic.yml)
36 | 
37 |         if [ $GREEN_TRAFFIC == null ]; then
38 |             if [ $BLUE_TRAFFIC == null ]; then
39 |                 echo "::error::No deployment slots available for endpoint $ENDPOINT_NAME. Nothing to swap.";
40 |                 exit 1;
41 |             else
42 |                 echo "Setting blue traffic to 100%"
43 |                 az ml online-endpoint update -n $ENDPOINT_NAME --traffic "blue=100"
44 |             fi
45 |         else
46 |             if [ $BLUE_TRAFFIC == null ]; then
47 |                 echo "Setting green traffic to 100%"
48 |                 az ml online-endpoint update -n $ENDPOINT_NAME --traffic "green=100"
49 |             else
50 |                 if [ $GREEN_TRAFFIC == 0 ]; then
51 |                   echo "Setting traffic to: green=100 blue=0"
52 |                   az ml online-endpoint update -n $ENDPOINT_NAME --traffic "green=100 blue=0"
53 |                 else
54 |                   echo "Setting traffic to: green=0 blue=100"
55 |                   az ml online-endpoint update -n $ENDPOINT_NAME --traffic "green=0 blue=100"
56 |                 fi
57 |             fi
58 |         fi


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.github/actions/aml-endpoint-test/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Test AzureML managed online endpoint deployment (0% traffic deployment)
 2 | description: 'Finds 0% traffic deployment of an Azure ML endpoint and tests it.'
 3 | 
 4 | inputs:
 5 |   resourceGroup:
 6 |     description: 'Name of the resource group where the workspace is placed.'
 7 |     required: true
 8 |   workspaceName:
 9 |     description: 'Name of the workspace to work against.'
10 |     required: true
11 |   endpointFile:
12 |     description: 'Path to the endpoint YAML file. Wildcard paths are supported which means that all matched endpoints will be deployed.'
13 |     required: true
14 |   requestFile:
15 |     description: 'Name of the json test request file.'
16 |     required: true
17 | 
18 | runs:
19 |   using: "composite"
20 |   steps:
21 |     - name: Test endpoint deployments
22 |       id: test-deployment
23 |       shell: bash
24 |       run: |
25 |         set -e
26 |         az configure --defaults workspace=${{ inputs.workspaceName }} group=${{ inputs.resourceGroup }}
27 | 
28 |         ENDPOINT_FILE=${{ inputs.endpointFile }}
29 |         ENDPOINT_NAME=$(yq -r ".name" $ENDPOINT_FILE)
30 |         echo "ENDPOINT_FILE: $ENDPOINT_FILE"
31 |         echo "ENDPOINT_NAME: $ENDPOINT_NAME"
32 | 
33 |         echo "Reading endpoint traffic to figure out which deployment is staging/production"
34 |         az ml online-endpoint show -n $ENDPOINT_NAME --query "traffic" -o yaml > endpoint_traffic.yml
35 |         echo "Endpoint traffic:"
36 |         cat endpoint_traffic.yml
37 |         GREEN_TRAFFIC=$(yq .green endpoint_traffic.yml)
38 |         BLUE_TRAFFIC=$(yq .blue endpoint_traffic.yml)
39 |         if [ $GREEN_TRAFFIC == 0 ]; then
40 |           TEST_DEPLOYMENT_NAME='green'
41 |         fi
42 |         if [ $BLUE_TRAFFIC == 0 ]; then
43 |           TEST_DEPLOYMENT_NAME='blue'
44 |         fi
45 | 
46 |         TEST_RESPONSE=$(az ml online-endpoint invoke -n $ENDPOINT_NAME --deployment $TEST_DEPLOYMENT_NAME --request-file ${{ inputs.requestFile }})
47 |         # TODO: test that response is valid, fail with exit 1 if not


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.github/actions/aml-job-create/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Submitting job
 2 | description: 'Creates and submit a new job to Azure ML based on a job configuration. Jobs are named using the provided job name and a unique run id returned by GitHub.'
 3 | 
 4 | inputs:
 5 |   # name:
 6 |   #   description: 'Name of the job to be created. Note that the final name of the job will be the given name followed by the number of the build run `github.run_id`. Thhis value is provided as an output.'
 7 |   #   required: true
 8 |   jobFile:
 9 |     description: 'Path to the job file.'
10 |     required: true
11 |   # workspaceName:
12 |   #   description: 'Name of the workspace to work against.'
13 |   #   required: true
14 |   # resourceGroup:
15 |   #   description: 'Name of the resource group where the workspace is placed.'
16 |   #   required: true
17 |   # noWait:
18 |   #   description: 'Indicates if the action should not wait for the job to finish.'
19 |   #   required: false
20 |   #   default: 'false'
21 | 
22 | # outputs:
23 | #   jobName:
24 | #     description: Name of the job name created in the workspace.
25 | #     value: ${{ steps.jobRun.outputs.jobName }}
26 | 
27 | runs:
28 |   using: "composite"
29 |   steps:
30 |     - name: Run AML Job
31 |       id: jobRun
32 |       shell: bash
33 |       run: |
34 |         run_id=$(az ml job create -f ${{ inputs.jobFile }} --query name -o tsv)
35 |         if [[ -z "$run_id" ]]
36 |         then
37 |           echo "Job creation failed"
38 |           exit 3
39 |         fi
40 |         az ml job show -n $run_id --web
41 |         status=$(az ml job show -n $run_id --query status -o tsv)
42 |         if [[ -z "$status" ]]
43 |         then
44 |           echo "Status query failed"
45 |           exit 4
46 |         fi
47 |         running=("Queued" "Starting" "Preparing" "Running" "Finalizing")
48 |         while [[ ${running[*]} =~ $status ]]
49 |         do
50 |           sleep 15 
51 |           status=$(az ml job show -n $run_id --query status -o tsv)
52 |           echo $status
53 |         done
54 |         if [[ "$status" = "Failed" ]]  
55 |         then
56 |           echo "Training Job failed"
57 |           exit 3
58 |         fi
59 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.github/workflows/workshop_cd.yml:
--------------------------------------------------------------------------------
 1 | name: workshop-cd
 2 | on:
 3 |   workflow_dispatch:
 4 |   pull_request:
 5 |     types:
 6 |       - opened
 7 |     branches: 
 8 |     - main
 9 |     paths:
10 |       - src/workshop/core/**
11 |       - .github/workflows/workshop_cd.yml
12 | jobs:
13 |   Workshop-Deployment:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 | 
17 |       - name: Check out repository code
18 |         uses: actions/checkout@v2
19 | 
20 |       - name: Setup python
21 |         uses: actions/setup-python@v2
22 |         with:
23 |           python-version: '3.8'
24 | 
25 |       - name: Upgrade pip
26 |         run: |
27 |           python -m pip install --upgrade pip
28 |           python -m pip install --upgrade build
29 |           python -m pip install --upgrade twine
30 | 
31 |       - name: AZ Login
32 |         uses: azure/login@v1
33 |         with:
34 |           creds: ${{ secrets.AZURE_SERVICE_PRINCIPAL }} #setup replace AZURE_SERVICE_PRINCIPAL with the name of your Azure credentials secret in GitHub
35 | 
36 |       - name: Install az ml & and tools
37 |         run: |
38 |           az extension add -n ml -y --version 2.2.1
39 |           sudo apt install jq
40 |           pip install yq
41 |       
42 |       - name: Run deployment
43 |         uses: ./.github/actions/aml-endpoint-deploy
44 |         with:
45 |           resourceGroup: njs-aia-rg #setup replace azureml with the name of your resource group in Azure
46 |           workspaceName: njs-ws #setup replace ws01ent with the name of your workspace in Azure
47 |           endpointFile: src/workshop/core/scoring/endpoint.yml
48 |           deploymentFile: src/workshop/core/scoring/deployment.yml
49 |           modelVersion: latest
50 |       
51 |       - name: Test deployment
52 |         uses: ./.github/actions/aml-endpoint-test
53 |         with:
54 |           resourceGroup: njs-aia-rg #setup replace azureml with the name of your resource group in Azure
55 |           workspaceName: njs-ws #setup replace ws01ent with the name of your workspace in Azure
56 |           endpointFile: src/workshop/core/scoring/endpoint.yml
57 |           requestFile: src/workshop/core/scoring/scoring_test_request.json
58 |   
59 |       - name: Swap deployment
60 |         uses: ./.github/actions/aml-endpoint-swap
61 |         with:
62 |           resourceGroup: njs-aia-rg #setup replace azureml with the name of your resource group in Azure
63 |           workspaceName: njs-ws #setup replace ws01ent with the name of your workspace in Azure
64 |           endpointFile: src/workshop/core/scoring/endpoint.yml


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.github/workflows/workshop_ci.yml:
--------------------------------------------------------------------------------
 1 | name: workshop-ci
 2 | on:
 3 |   workflow_dispatch:
 4 |   pull_request:
 5 |     types:
 6 |       - closed
 7 |     branches: 
 8 |     - integration
 9 |     paths:
10 |       - src/workshop/core/**
11 |       - .github/workflows/workshop_ci.yml
12 | jobs:
13 |   Workshop-Train-Validation:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Check out repository code
17 |         uses: actions/checkout@v3
18 |       - name: Setup python
19 |         uses: actions/setup-python@v2
20 |         with:
21 |           python-version: '3.8' # Version range or exact version of a Python version to use, using SemVer's version range syntax 
22 |       - name: Upgrade pip
23 |         run: |
24 |           python -m pip install --upgrade pip
25 |           python -m pip install --upgrade build
26 |           python -m pip install --upgrade twine
27 |       - name: AZ Login
28 |         uses: azure/login@v1
29 |         with:
30 |           creds: ${{ secrets.AZURE_SERVICE_PRINCIPAL }}   #setup: provide your Azure credentials name stored in github
31 | 
32 |       - name: Install az ml & set default values for AML
33 |         run: | #setup: provide group, workspace and location
34 |           az extension add -n ml -y --version 2.2.1
35 |           az configure --defaults group=njs-aia-rg workspace=njs-ws location=eastus
36 |       - name: run training and model validation
37 |         run: |
38 |          az ml job create -s -f src/workshop/core/pipelines/training_pipeline.yml
39 | 
40 |       - name:  Create Pull Request to Main
41 |         uses: thomaseizinger/create-pull-request@master
42 |         with:
43 |           GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN_GITHUB }} #setup: provide your github secret name
44 |           head: ${{ github.ref }}
45 |           base: main
46 |           title: "An automatically created PR to main by successful CI"
47 | 
48 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.github/workflows/workshop_unit_test.yml:
--------------------------------------------------------------------------------
 1 | name: feature_engineering_unit_test
 2 | on:
 3 |   workflow_dispatch:
 4 |   push:
 5 |     branches-ignore:
 6 |       - main
 7 |       - integration
 8 |     paths:
 9 |       - src/workshop/core/data_engineering/*
10 |       - .github/workflows/workshop_unit_test.yml
11 | 
12 | jobs:
13 |   unit-test:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Check out repository code
17 |         uses: actions/checkout@v3
18 |       - name: Setup python
19 |         uses: actions/setup-python@v2
20 |         with:
21 |           python-version: '3.8' # Version range or exact version of a Python version to use, using SemVer's version range syntax 
22 |       - name: Upgrade pip
23 |         run: |
24 |           python -m pip install --upgrade pip
25 |           python -m pip install --upgrade build
26 |           python -m pip install --upgrade twine
27 |       - name: AZ Login
28 |         uses: azure/login@v1
29 |         with:
30 |           creds: ${{ secrets.AZURE_SERVICE_PRINCIPAL }} # SETUP: replace AZURE_SERVICE_PRINCIPAL with your own secret name    
31 |       - name: Install AZ ML and tools
32 |         run: | # SETUP line 34 to point to your own AML workspace
33 |           az extension add -n ml -y --version 2.2.1
34 |           az configure --defaults group=njs-aia-rg workspace=njs-ws location=eastus   
35 |       - name: Run Feature Engineering
36 |         uses: ./.github/actions/aml-job-create
37 |         with:
38 |           jobFile: src/workshop/core/data_engineering/feature_engineering.yml
39 | 
40 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/.gitignore:
--------------------------------------------------------------------------------
  1 | #
  2 | src/workshop/data/*.parquet
  3 | src/workshop/data/*.joblib
  4 | *.amlignore
  5 | *.amltmp
  6 | *.ipynb_aml_checkpoints
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | pip-wheel-metadata/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Nick Switanek
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/README.md:
--------------------------------------------------------------------------------
 1 | [![Board Status](https://dev.azure.com/mlops-field/c4a73005-3da3-411a-806b-e3fc770a2d0f/b4d02123-12a2-46bc-b717-3862eac0b33f/_apis/work/boardbadge/c154e447-6da6-4e54-9da2-880c03bd8e89)](https://dev.azure.com/mlops-field/c4a73005-3da3-411a-806b-e3fc770a2d0f/_boards/board/t/b4d02123-12a2-46bc-b717-3862eac0b33f/Microsoft.RequirementCategory)
 2 | # MLOps with Azure DevOps and Azure Databricks
 3 | MLOps-ado-adb is a repo created by Microsoft field personnel (GBB, CSA, MTC) that provides a template to facilitate an introductory workshop on modern MLOps practices, using Azure DevOps for CI/CD pipelines and Azure Databricks for ML asset development and compute. This repo is modeled after the [work](https://github.com/microsoft/MLOpsTemplate/) of Microsoft's West Region CSU, which instead uses GitHub Actions and Azure Machine Learning.
 4 | 
 5 | Here is the link to the workshop materials:
 6 | - [MLOps workshop materials](/src/workshop/)
 7 | 
 8 | 
 9 | ## Contributing
10 | 
11 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
12 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
13 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
14 | 
15 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
16 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
17 | provided by the bot. You will only need to do this once across all repos using our CLA.
18 | 
19 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
20 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
21 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
22 | 
23 | ## Trademarks
24 | 
25 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 
26 | trademarks or logos is subject to and must follow
27 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
28 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
29 | Any use of third-party trademarks or logos are subject to those third-party's policies.
30 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # TODO: The maintainer of this repo has not yet edited this file
 2 | 
 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
 4 | 
 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help.
 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport).
 7 | - **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide.
 8 | 
 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
10 | 
11 | # Support
12 | 
13 | ## How to file issues and get help  
14 | 
15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
16 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
17 | feature request as a new Issue.
18 | 
19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 
20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
22 | 
23 | ## Microsoft Support Policy  
24 | 
25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
26 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/README.md:
--------------------------------------------------------------------------------
 1 | # MLOps Workshop
 2 | 
 3 | ## Introduction
 4 | The MLOps workshop is an instructor-led workshop that provides guidance on an MLOps
 5 | implementation in Azure. MLOps is a pattern of practices rather than a technology, and there are various ways of implementing MLOps on Azure. This workshop leverages [Azure Databricks](https://learn.microsoft.com/en-us/azure/databricks/introduction/)
 6 | and [Azure DevOps](https://learn.microsoft.com/en-us/azure/devops/user-guide/what-is-azure-devops?view=azure-devops)
 7 | to implement a robust set of workflows to support machine learning models in production. For a workshop using Azure Machine Learning and GitHub Actions, see a similar set of materials [here](https://github.com/microsoft/MLOpsTemplate/).
 8 | 
 9 | The core capability deployed in this scenario is a prediction of wine quality using a set of empirical measures. This is based on a [UCI Dataset](https://archive.ics.uci.edu/dataset/186/wine+quality). This is treated as a classification scenario, which occurs frequently for many enterprises. For the purpose of this workshop, the key stages of exploring the data,
10 | engineering predictive features (data engineering) and model building (training, hyperparameter tuning,
11 | algorithm selection, etc.) will be assumed to be done and already codified in this [Databricks
12 | notebook](https://learn.microsoft.com/en-us/azure/databricks/mlflow/end-to-end-example).
13 | The core focus of the workshop will then be how to refactor this notebook for easier maintenance and iterative development, lay the DevOps foundations for the ML lifecycle, for continuous delivery of the best predictive capabilities in production even as data science team members experiment with new techniques to improve model performance.
14 | 
15 | ## Audience
16 | - Data scientists
17 | - ML engineers
18 | - ML platform architects and managers
19 | - ... and any other roles that require hands-on experience to support ML models in Azure
20 | 
21 | ## Goals
22 | - Understand key elements of modern MLOps and how it helps improve and accelerate ML practices.
23 | - Design experiments and MLOps pipelines in Azure Databricks.
24 | - Get hands-on experience in building continuous integration and continuous deployment pipelines with Azure DevOps.
25 | 
26 | 
27 | Now, head to [Workshop Environment Setup: Part 0](documents/part_0.md)
28 | 
29 | 
30 | ## Contributing
31 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
32 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
33 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
34 | 
35 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
36 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
37 | provided by the bot. You will only need to do this once across all repos using our CLA.
38 | 
39 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
40 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
41 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
42 | 
43 | ## Trademarks
44 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 
45 | trademarks or logos is subject to and must follow 
46 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
47 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
48 | Any use of third-party trademarks or logos are subject to those third-party's policies.
49 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/arm000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/arm000.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/arm001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/arm001.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/arm002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/arm002.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/arm100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/arm100.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/cicd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/cicd.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/cloudshell-accept.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/cloudshell-accept.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/cloudshell-firstlaunch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/cloudshell-firstlaunch.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/cloudshell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/cloudshell.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/cloudshell2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/cloudshell2.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/image-10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/image-10.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/image-11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/image-11.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/image-12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/image-12.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/image-13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/image-13.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/image-14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/image-14.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/image-15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/image-15.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/image-16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/image-16.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/image-8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/image-8.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/image-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/image-9.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/monolithic_modular.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/monolithic_modular.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part3cicd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part3cicd.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_0_adb_add_sp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_0_adb_add_sp.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_add_sp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_add_sp.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_pipe1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_pipe1.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_pipe2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_pipe2.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_pipe3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_pipe3.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_pipe4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_pipe4.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_pipe5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_0_ado_pipe5.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_0_integration_policies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_0_integration_policies.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_0_main_policies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_0_main_policies.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_0_set_model_permissions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_0_set_model_permissions.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_1_adb_create_branch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_1_adb_create_branch.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_1_adb_file_exp_dev.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_1_adb_file_exp_dev.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_1_branch_ui_integration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_1_branch_ui_integration.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_1_db_repo_file_explorer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_1_db_repo_file_explorer.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_1_git_options_from_adb_repo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_1_git_options_from_adb_repo.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_1_model_registry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_1_model_registry.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_2_aad_login.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_2_aad_login.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_2_ado_manual_trigger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_2_ado_manual_trigger.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_2_azpipe_run_nb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_2_azpipe_run_nb.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_2_pipe_adb_step.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_2_pipe_adb_step.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_2_pipe_job.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_2_pipe_job.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_2_run_job.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_2_run_job.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_3_adb_repo_commit_push.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_3_adb_repo_commit_push.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_3_adb_repo_link_in_nb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_3_adb_repo_link_in_nb.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_3_unit_test_triggers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_3_unit_test_triggers.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/part_4_adb_training_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/part_4_adb_training_workflow.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli000.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli001.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli002.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli003.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli004.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli005.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli006.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli007.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli008.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli009.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/run_mlopsworkshop_azcli010.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/training_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/training_pipeline.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/images/video_img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/MLOps-ADO-ADB/src/workshop/documents/images/video_img.png


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/part_2.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Part 2:  Preparing notebooks for remote triggering
 3 | 
 4 | ## Pre-requisites
 5 | - Complete [Part 0](part_0.md), [Part 1](part_1.md)
 6 | - In your Databricks Repo, have a personal dev branch that you created off of the `integration` branch, named `dev-{yourname}` or similar (for example, `dev-nick`). 
 7 | - Run each notebook successfully via the Databricks notebook UI -- data prep, training, and evaluating.
 8 | - Confirm that you have a Model labeled "Production" in the Models section of Databricks.
 9 | 
10 | ## Summary 
11 | After successfully restructuring the end-to-end Databricks notebook into task-focused, modular notebooks, and running those notebooks via the Databricks UI, your team wants to prepare to run the notebooks automatically in response to code changes.
12 | 
13 | To do this, we need to move away from untracked, user-driven notebooks to version-controlled notebooks that can be run not by a person but by a Service Principal, which is a type of application with narrowly constrained rights and responsibilities. Your MLOps platform administrator should already have created a Service Principal, granted it the appropriate permissions to interact with your Databricks service, and given you permissions to use the Service Principal to run notebooks and workflows in Databricks.
14 | 
15 | To introduce these concepts you will next do the following:
16 | - Preview how to run the data prep notebook via REST API 
17 | - Preview how to use the Service Principal to run the notebook 
18 | - Review the configuration of an Azure Pipeline in Azure DevOps to run a "unit test" notebook using the Service Principal
19 | - Manually trigger the Azure Pipeline from Azure DevOps
20 | 
21 | 
22 | ## Steps
23 | 1. Many actions you take in Databricks, including running a notebook, can be triggered programmatically via API. Let's unpack an example call to the API to run a notebook:
24 | ![Sample Databricks API to run job](images/part_2_run_job.png)
25 | 
26 | In the first line we see `curl -X POST`, which means we're using a command-line utility, curl, to issue a request to a URL address.
27 | At the bottom we see where the request is sent, to `$(databricks_workspace_uri)/api/2.1/jobs/run-now`. The `$(databricks_workspace_uri)` part is a variable referring to the URI of your Databricks instance, which corresponds to what you can find in the address bar of your browser and is of the form "https://{some string}.azuredatabricks.net/". 
28 | 
29 | Many variables used in this pipeline were specified in Part 0 during the platform setup, and they are in the Azure DevOps library as a variable group. Those can include secured secrets, including those in a linked Azure Key Vault.
30 | 
31 | 
32 | After that is the `/api/2.1/jobs/run-now`, which is how we express the command to run the notebook.
33 | 
34 | 2. Next, in order to run a notebook in Databricks you need to have the right permissions. This is what the `Authorization: Bearer '"$(token)"'` is about. Unless we pass the right token along with the API request, the request will be rejected and no action will be taken. From the `$(token)` notation, we see that the token is a variable. How do we get that token? Prior to making the Databricks API request, we're going to request the token for the Service Principal from the Azure AD API.
35 | 
36 | ![Azure AD Login API](images/part_2_aad_login.png)
37 | 
38 | With this REST call, we are asking Azure AD for an OAuth token for the Service Principal, referred to by its `client_id`. We are passing the `client_secret`, as a variable, for the Service Principal along with the request, and if the request is authorized, we'll get back a JSON message that will include the `access_token` as one of the elements of the JSON, which we use `jq` to parse and extract to the variable `token`. That `token` is what is referenced by `$(token)` in the call to the Databricks API. For security purposes, this token only lives for an hour, so each time we call the API, we'll first have the Service Principal authenticate.
39 | 
40 | 3. So we now know that running the notebook via the Databricks API requires first authenticating and getting a token that reflects the right permissions. We'd like these two steps to run on a secure machine in a pipeline that can be automated in response to certain events like code changes that have been saved and committed to our repo. Azure Pipelines is a platform enabling just such functionality. 
41 | 
42 | Here is an example Azure Pipeline definition:
43 | 
44 | ![Azure Pipeline to run Databricks Notebook](images/part_2_azpipe_run_nb.png)
45 | 
46 | In the `steps` section we can see the authentication request (1) followed by the Databricks API request to run a notebook (2).
47 | 
48 | These `curl` requests have to be run somewhere, and in the `pool` section we see that Azure Pipelines will run them on a virtual machine running the latest Linux ubuntu OS.
49 | 
50 | At the top of the pipeline configuration there is a `trigger` section, which is where we'll specify the conditions under which the steps should be executed. We'll configure the trigger section in Part 3 of the workshop.
51 | 
52 | 4. For now let's manually trigger the Azure Pipeline to confirm it does what we expect. In your browser, navigate to your Azure DevOps project at https://dev.azure.com and go to the Pipelines section of the sidebar.
53 | 
54 | You should see a pipeline named "Data Prep Unit Test Pipeline." Click on the pipeline name to see a list of prior runs of the pipeline, along with a blue button to "Run pipeline". Use the blue "Run pipeline" button to manually trigger the pipeline.
55 | 
56 | Be sure to run the pipeline on your dev branch of the repo:
57 | 
58 | ![Azure Pipeline Job](images/part_2_ado_manual_trigger.png)
59 | 
60 | 5. Let's review what happens next. Click on the "Job" link to open up the Azure Pipeline job you just saved and ran.
61 | ![Azure Pipeline Job](images/part_2_pipe_job.png)
62 | 
63 | You'll see a long list of steps that have run on the Linux VM in Azure pipelines. All steps in the Azure Pipeline should show a green checkmark as having successfully completed. Many of the steps are utility steps, but they also include the two steps we explicitly defined in the pipeline YAML, namely the authorization step, labeled "Get Entra ID token" -- Entra is the new brand name for Azure AD --, and the run notebook step, labeled "Run Databricks notebook via API". Click on "Run Databricks notebook via API".
64 | ![Azure Pipeline Job - Databricks API step](images/part_2_pipe_adb_step.png)
65 | 
66 | 6. Finally, let's confirm that the command we issued to our Databricks workspace from the Azure Pipeline actually triggered the Databricks notebook to run. In your browser, return to Azure Databricks and navigate to Workflows > Job runs. You should see a job with name "Data Prep Pipeline Run - {your branch name}" that was run as your Service Principal.
67 | 
68 | 
69 | ## Success criteria
70 | - Basic understanding of how to call the Databricks API to run a notebook.
71 | - Basic understanding that the Service Principal can execute the API call, if it is authenticated and has the right permissions.
72 | - Basic understanding of how an Azure Pipeline can be configured to automate the sequence of steps for Service Principal authentication followed by Databricks notebook run using the Databricks API.
73 | - Review the Azure Pipeline run.
74 | - Confirm that the Azure Pipeline ran a notebook job in Azure Databricks.
75 | 
76 | 
77 | ## [Go to Part 3](part_3.md)
78 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/part_5.md:
--------------------------------------------------------------------------------
 1 | # Part 5: Continuous Delivery (CD)
 2 | 
 3 | ## Pre-requisites
 4 | - Complete [Part 0](part_0.md), [Part 1](part_1.md), [Part 2](part_2.md), [Part 3](part_3.md) and [Part 4](part_4.md)
 5 | 
 6 | ## Summary 
 7 | 
 8 | After a successful run of the CI (continuous integration) pipeline, your team is looking to complete the process with a CD (continuous delivery, or continuous deployment) pipeline that will handle the deployment of the new, better-performing model while maintaining continuous delivery of the model to processes that depend on the model's availability, without introducing any downtime in production, also known as a "hot swap".
 9 | 
10 | The goal of this section is to get a fully functional CD pipeline running that will:
11 |     
12 | 1. Authenticates using a Service Principal to be able to leverage the Azure Databricks commands in your workflow.
13 | 2. Be automatically triggered based on a Pull Request (PR) that is approved to merge the new code that passes the integration tests in the `integration` branch into the `main` branch of the repo.
14 | 3. If the model performance metrics show improvement over the current production model on production data, then promote the new model to production and archive the old model. 
15 | 
16 | ## Steps
17 | 
18 | 1. As you have done since Part 3, you define triggers as part of a Azure Pipelines workflow. The CD workflow is triggered when a pull request is created and the new code in `integration` is merged into the `main` branch. The PR to `main` is opened  if the new code results in a model that outperforms the prior model on test data. The triggers for this workshop have already been defined in `.azure_pipelines/workflows/cd.yml`. 
19 | 
20 | The key elements of the trigger section are as follows:
21 | 
22 | ```
23 | # .azure_pipelines/workflows/cd.yml
24 | 
25 | trigger:
26 |   branches:
27 |     exclude:
28 |       - integration
29 |     include:
30 |       - main
31 |   paths:
32 |     include:
33 |       - src/workshop/notebooks/part_1_1_data_prep.ipynb
34 |       - src/workshop/notebooks/part_1_2_training.ipynb
35 |       - src/workshop/notebooks/part_1_3_evaluating.ipynb
36 |       - .azure_pipelines/cd.yml
37 | 
38 | ```
39 | 
40 | 2. The CD workflow relies on the Azure CLI to control the infrastructure and implement the automation of the model deployments. Therefore, we need to setup this workflow to login to Azure via a Service Principal to be able to leverage the Azure CLI.
41 | 
42 |     > Action Items:
43 |     > 1. Open up the `cd.yml` file in your Azure repo under `.azure_pipelines/`.
44 |     > 2. Update the 'creds: ${{ secrets...' section in this file to setup your secret name. Follow the instructions in this file annotated with #setup.
45 | 
46 |     > Note: Please refer to [Use the Azure login action with a service principal secret](https://docs.microsoft.com/en-us/azure/developer/github/connect-from-azure?tabs=azure-portal%2Cwindows#use-the-azure-login-action-with-a-service-principal-secret) to create the proper Azure Credentials if you haven't done so already (you should have already defined such secret to complete the CI part of the workshop, i.e. [Part 4](part_4.md)).
47 | 
48 | 3. In our scenario, a model is deployed to production when it occupies the "Production" model slot in the model registry. Our CD pipeline needs to ensure that the current best model is always available in the "Production" slot. The Azure Pipeline we specify for CD automates these deployments.
49 | 
50 | Now let's configure the Azure Pipelines configuration file that controls the CD process located at `.azure_pipelines/cd.yml`
51 | 
52 | > Action Item:
53 | >- Edit `cd.yml` to setup your Azure resource group name and Azure ML workspace name which are being passed as parameters to a set of custom GitHub Actions. Look for #setup and follow the instructions in the file.
54 | 
55 | > Action Items:
56 | > 1. Commit your configuration changes and push them up to the Azure Repo in your own development branch. 
57 | > 2. Go to the Azure Pipelines UI, select the pipeline you configured in 'cd.yml', and trigger it to run now on your own branch.
58 | > 3. Once triggered, click on it to open up the details and monitor its execution.
59 | 
60 | 
61 | 4. (optional) Test the new deployment using `/notebooks/part_1_4_scoring`.
62 | 
63 | ## Success criteria
64 | 
65 | - The CD pipeline runs sucessfully each time a PR request to 'main' is merged. Please test this by creating your own PR to main.
66 | - Check that the better new model is deployed to the Production slot in your model registry, the Models section of Azure Databricks.
67 | 
68 | 
69 | ## Congratulations!
70 | This completes this workshop. You have gained hands-on experience with many of the key concepts involved in MLOps using Azure Databricks and Azure DevOps. 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/documents/part_tips.md:
--------------------------------------------------------------------------------
 1 | # Pre-Workshop Checklist
 2 | > Note: Review the following criteria to ensure you can complete the workshop. These are critical pieces of access to get right for a successful workshop experience.
 3 | 
 4 | ## Azure 
 5 | 1. Do you have an Azure account?
 6 | 
 7 | 2. Do you have a `Contributor` role for your Azure Subscription?
 8 |     - If you don't, do you have a `Contributor` role for the Azure Resource Group?
 9 |          > Note: If you don't, you can't run the workshop.
10 | 
11 | 3. Do you have a Service Principal?
12 |     - If you don't, do you know the Service Principal and it's information (client id, secret)?
13 |     - If you don't, can you ask your Cloud team to create the Service Principal for limited scope of a resource group?
14 |          > Note: If you don't, you can't run the workshop.
15 | 
16 | 4. Do you know who can help you to handle issues?
17 | 
18 | 5. Do you know a person from your Cloud infra/security team who can help you:
19 |     - Create Azure resources
20 |     - Grant permission
21 | 
22 | 6. Did you register 'Microsoft.MachineLearningServices' for your Azure subscription?
23 | > Note: If you're not sure, go to the Azure Portal > Subscriptions > 'YourSubscription' > Resource providers' > Search 'Microsoft.MachineLearningServices'
24 | 
25 | ![ml_services](./images/arm100.png)
26 | 
27 | ## Github
28 | 1. Do you have a Github account?
29 | > Note: If not, create a new account and follow the instructions in Part 0 of the workshop.
30 | 
31 | # [Go to Part 0](./part_0.md)
32 | 


--------------------------------------------------------------------------------
/MLOps-ADO-ADB/src/workshop/notebooks/part_1_4_scoring.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "application/vnd.databricks.v1+cell": {
  7 |      "cellMetadata": {},
  8 |      "inputWidgets": {},
  9 |      "nuid": "42da3deb-616c-47da-ba93-a9259704ce36",
 10 |      "showTitle": false,
 11 |      "title": ""
 12 |     }
 13 |    },
 14 |    "source": [
 15 |     "## Batch inference\n",
 16 |     "\n",
 17 |     "There are many scenarios where you might want to evaluate a model on a corpus of new data. For example, you may have a fresh batch of data, or may need to compare the performance of two models on the same corpus of data.\n",
 18 |     "\n",
 19 |     "The following code evaluates the model on data stored in a Delta table, using Spark to run the computation in parallel."
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 0,
 25 |    "metadata": {
 26 |     "application/vnd.databricks.v1+cell": {
 27 |      "cellMetadata": {},
 28 |      "inputWidgets": {},
 29 |      "nuid": "44a3d2ba-1285-460e-be14-a060c06de364",
 30 |      "showTitle": false,
 31 |      "title": ""
 32 |     }
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# To simulate a new corpus of data, save the existing X_train data to a Delta table. \n",
 37 |     "# In the real world, this would be a new batch of data.\n",
 38 |     "spark_df = spark.createDataFrame(X_train)\n",
 39 |     "table_path = \"dbfs:/tutorials/wine-data/delta\"\n",
 40 |     "\n",
 41 |     "# Delete the contents of this path in case this cell has already been run\n",
 42 |     "dbutils.fs.rm(table_path, True)\n",
 43 |     "spark_df.write.format(\"delta\").save(table_path)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {
 49 |     "application/vnd.databricks.v1+cell": {
 50 |      "cellMetadata": {},
 51 |      "inputWidgets": {},
 52 |      "nuid": "a033adeb-5af8-403e-8c68-75dd829caedf",
 53 |      "showTitle": false,
 54 |      "title": ""
 55 |     }
 56 |    },
 57 |    "source": [
 58 |     "Load the model into a Spark UDF, so it can be applied to the Delta table."
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 0,
 64 |    "metadata": {
 65 |     "application/vnd.databricks.v1+cell": {
 66 |      "cellMetadata": {},
 67 |      "inputWidgets": {},
 68 |      "nuid": "d5a7251e-ef02-4eb3-8c51-52ed6faa8abf",
 69 |      "showTitle": false,
 70 |      "title": ""
 71 |     }
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "import mlflow.pyfunc\n",
 76 |     "\n",
 77 |     "apply_model_udf = mlflow.pyfunc.spark_udf(spark, f\"models:/{model_name}/production\")"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 0,
 83 |    "metadata": {
 84 |     "application/vnd.databricks.v1+cell": {
 85 |      "cellMetadata": {},
 86 |      "inputWidgets": {},
 87 |      "nuid": "7beb5d22-6dcd-4a6c-80fb-6ca88808ccfb",
 88 |      "showTitle": false,
 89 |      "title": ""
 90 |     }
 91 |    },
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "# Read the \"new data\" from Delta\n",
 95 |     "new_data = spark.read.format(\"delta\").load(table_path)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 0,
101 |    "metadata": {
102 |     "application/vnd.databricks.v1+cell": {
103 |      "cellMetadata": {},
104 |      "inputWidgets": {},
105 |      "nuid": "39e22812-2516-48e6-9735-21bff0fcbf29",
106 |      "showTitle": false,
107 |      "title": ""
108 |     }
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "display(new_data)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 0,
118 |    "metadata": {
119 |     "application/vnd.databricks.v1+cell": {
120 |      "cellMetadata": {},
121 |      "inputWidgets": {},
122 |      "nuid": "8087a6e1-511d-4c5b-895c-34539ed004a2",
123 |      "showTitle": false,
124 |      "title": ""
125 |     }
126 |    },
127 |    "outputs": [],
128 |    "source": [
129 |     "from pyspark.sql.functions import struct\n",
130 |     "\n",
131 |     "# Apply the model to the new data\n",
132 |     "udf_inputs = struct(*(X_train.columns.tolist()))\n",
133 |     "\n",
134 |     "new_data = new_data.withColumn(\n",
135 |     "  \"prediction\",\n",
136 |     "  apply_model_udf(udf_inputs)\n",
137 |     ")"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 0,
143 |    "metadata": {
144 |     "application/vnd.databricks.v1+cell": {
145 |      "cellMetadata": {},
146 |      "inputWidgets": {},
147 |      "nuid": "c451842f-0280-4900-9e6f-4bd425e8c139",
148 |      "showTitle": false,
149 |      "title": ""
150 |     }
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "# Each row now has an associated prediction. Note that the xgboost function does not output probabilities by default, so the predictions are not limited to the range [0, 1].\n",
155 |     "display(new_data)"
156 |    ]
157 |   }
158 |  ],
159 |  "metadata": {
160 |   "application/vnd.databricks.v1+notebook": {
161 |    "dashboards": [],
162 |    "language": "python",
163 |    "notebookMetadata": {
164 |     "pythonIndentUnit": 4
165 |    },
166 |    "notebookName": "part_1_4_scoring",
167 |    "widgets": {}
168 |   },
169 |   "language_info": {
170 |    "name": "python"
171 |   },
172 |   "orig_nbformat": 4
173 |  },
174 |  "nbformat": 4,
175 |  "nbformat_minor": 0
176 | }
177 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MLOps Template
 2 | MLOps Template is a repo created by Microsoft field personnel (CSA, GBB, MTC) that provides several tools and templates to facilitate modern MLOps practices. 
 3 | 
 4 | In addition to a template for an Active Learning implementation, there are also two sets of materials to facilitate an introductory workshop on modern MLOps practices, one, developed by the West Region CSU, using Azure Machine Learning and GitHub Actions, and another featuring Azure Databricks for ML asset development and Azure DevOps for CI/CD pipelines.
 5 | 
 6 | - [Active Learning template](/src/active_learning_cv/)
 7 | - [MLOps workshop materials using Azure Machine Learning and GitHub Actions](/src/workshop/)
 8 | - [MLOps workshop materials using Azure Databricks and Azure DevOps](MLOps-ADO-ADB/src/workshop/)
 9 | 
10 | 
11 | ## Contributing
12 | 
13 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
14 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
15 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
16 | 
17 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
18 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
19 | provided by the bot. You will only need to do this once across all repos using our CLA.
20 | 
21 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
22 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
23 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
24 | 
25 | ## Trademarks
26 | 
27 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 
28 | trademarks or logos is subject to and must follow
29 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
30 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
31 | Any use of third-party trademarks or logos are subject to those third-party's policies.
32 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # TODO: The maintainer of this repo has not yet edited this file
 2 | 
 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
 4 | 
 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help.
 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport).
 7 | - **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide.
 8 | 
 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
10 | 
11 | # Support
12 | 
13 | ## How to file issues and get help  
14 | 
15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
16 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
17 | feature request as a new Issue.
18 | 
19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 
20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
22 | 
23 | ## Microsoft Support Policy  
24 | 
25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
26 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/__init__.py


--------------------------------------------------------------------------------
/src/active_learning_cv/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # [Please access the repo of Active Learning Here](https://github.com/microsoft/MLOpsTemplate/tree/james-simdev/src/active_learning_cv)
3 | 


--------------------------------------------------------------------------------
/src/active_learning_cv/data/images/comparision_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/active_learning_cv/data/images/comparision_table.png


--------------------------------------------------------------------------------
/src/active_learning_cv/data/images/functional_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/active_learning_cv/data/images/functional_flow.png


--------------------------------------------------------------------------------
/src/active_learning_cv/data/images/ls_rs_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/active_learning_cv/data/images/ls_rs_13.png


--------------------------------------------------------------------------------
/src/active_learning_cv/data/images/ls_rs_14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/active_learning_cv/data/images/ls_rs_14.png


--------------------------------------------------------------------------------
/src/active_learning_cv/data/images/ls_rs_es_smu_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/active_learning_cv/data/images/ls_rs_es_smu_13.png


--------------------------------------------------------------------------------
/src/active_learning_cv/data/images/technical_design.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/active_learning_cv/data/images/technical_design.png


--------------------------------------------------------------------------------
/src/workshop/conda-local.yml:
--------------------------------------------------------------------------------
 1 | name: mlops-workshop-local
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip=21.3.1
 7 |   - pip:
 8 |     - azureml-sdk==1.38.0
 9 |     - azureml-mlflow==1.38.0
10 |     - azureml-opendatasets==1.38.0
11 |     - pandas==1.3.5
12 |     - scikit-learn==1.0.2


--------------------------------------------------------------------------------
/src/workshop/core/data_engineering/conda_feature_engineering.yml:
--------------------------------------------------------------------------------
 1 | name: data-engineering
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip=21.3.1
 7 |   - pip:
 8 |     - azureml-mlflow==1.38.0
 9 |     - azureml-opendatasets==1.38.0
10 |     - pandas==1.3.5
11 |     - scikit-learn==1.0.2


--------------------------------------------------------------------------------
/src/workshop/core/data_engineering/feature_engineering.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from datetime import datetime
  4 | from dateutil.relativedelta import relativedelta
  5 | import argparse
  6 | import sys
  7 | import os
  8 | from sklearn.model_selection import train_test_split
  9 | sys.path.append(os.path.join(os.path.dirname(__file__),'../../'))
 10 | def parse_args():
 11 |     # setup arg parser
 12 |     parser = argparse.ArgumentParser()
 13 | 
 14 | 
 15 |     # add arguments
 16 |     parser.add_argument("--nyc_file_name", type=str, default="green_taxi.parquet")
 17 |     parser.add_argument("--public_holiday_file_name", type=str, default="holidays.parquet")
 18 |     parser.add_argument("--weather_file_name", type=str, default="weather.parquet")
 19 |     parser.add_argument("--prep_data", type=str,default="data", help="Path of prepped data")
 20 |     parser.add_argument("--input_folder", type=str, default="data")
 21 |     parser.add_argument("--run_mode", type=str, default="local")
 22 | 
 23 |     # parse args
 24 |     args = parser.parse_args()
 25 | 
 26 |     # return args
 27 |     return args
 28 | 
 29 | 
 30 | def build_time_features(vector):
 31 |     pickup_datetime = vector[0]
 32 |     month_num = pickup_datetime.month
 33 |     day_of_month = pickup_datetime.day
 34 |     day_of_week = pickup_datetime.weekday()
 35 |     hour_of_day = pickup_datetime.hour
 36 |     country_code = "US"
 37 |     hr_sin = np.sin(hour_of_day*(2.*np.pi/24))
 38 |     hr_cos = np.cos(hour_of_day*(2.*np.pi/24))
 39 |     dy_sin = np.sin(day_of_week*(2.*np.pi/7))
 40 |     dy_cos = np.cos(day_of_week*(2.*np.pi/7))
 41 |     
 42 |     return pd.Series((month_num, day_of_month, day_of_week, hour_of_day, country_code, hr_sin, hr_cos, dy_sin, dy_cos))
 43 | 
 44 | def main(args):
 45 |     
 46 |     # read in data
 47 | 
 48 |     green_taxi_df = pd.read_parquet(os.path.join(args.input_folder, args.nyc_file_name))
 49 | 
 50 |     green_taxi_df[["month_num", "day_of_month","day_of_week", "hour_of_day", "country_code", "hr_sin", "hr_cos", "dy_sin", "dy_cos"]] = \
 51 |         green_taxi_df[["lpepPickupDatetime"]].apply(build_time_features, axis=1)
 52 | 
 53 |     columns_to_remove = ["lpepDropoffDatetime", "puLocationId", "doLocationId", "extra", "mtaTax",
 54 |                      "improvementSurcharge", "tollsAmount", "ehailFee", "tripType", "rateCodeID", 
 55 |                      "storeAndFwdFlag", "paymentType", "fareAmount", "tipAmount"]
 56 | 
 57 |     green_taxi_df.drop(columns_to_remove, axis=1, inplace=True)
 58 | 
 59 | 
 60 |     green_taxi_df["datetime"] = green_taxi_df["lpepPickupDatetime"].dt.normalize()
 61 | 
 62 | 
 63 |     holidays_df = pd.read_parquet(os.path.join(args.input_folder, args.public_holiday_file_name))
 64 | 
 65 |     holidays_df = holidays_df.rename(columns={"countryRegionCode": "country_code"})
 66 |     holidays_df["datetime"] = holidays_df["date"].dt.normalize()
 67 | 
 68 |     holidays_df.drop(["countryOrRegion", "holidayName", "date"], axis=1, inplace=True)
 69 | 
 70 |     taxi_holidays_df = pd.merge(green_taxi_df, holidays_df, how="left", on=["datetime", "country_code"])
 71 |     taxi_holidays_df[taxi_holidays_df["normalizeHolidayName"].notnull()]
 72 |     
 73 | 
 74 |     weather_df = pd.read_parquet(os.path.join(args.input_folder,args.weather_file_name))
 75 | 
 76 |     weather_df["datetime"] = weather_df["datetime"].dt.normalize()
 77 | 
 78 |     # group by datetime
 79 |     aggregations = {"precipTime": "max", "temperature": "mean", "precipDepth": "max"}
 80 |     weather_df_grouped = weather_df.groupby("datetime").agg(aggregations)
 81 |     weather_df_grouped.head(10)
 82 | 
 83 |     taxi_holidays_weather_df = pd.merge(taxi_holidays_df, weather_df_grouped, how="left", on=["datetime"])
 84 |     taxi_holidays_weather_df.describe()
 85 | 
 86 |     final_df = taxi_holidays_weather_df.query("pickupLatitude>=40.53 and pickupLatitude<=40.88 and \
 87 |                                            pickupLongitude>=-74.09 and pickupLongitude<=-73.72 and \
 88 |                                            tripDistance>0 and tripDistance<75 and \
 89 |                                            passengerCount>0 and passengerCount<100 and \
 90 |                                            totalAmount>0")
 91 |     final_df, test_df = train_test_split(final_df, test_size=0.2, random_state=100)
 92 |     os.makedirs(args.prep_data, exist_ok=True)
 93 |     
 94 |     if args.run_mode =='local':
 95 |         print("Data Files were written successfully to folder:", args.prep_data)
 96 |     
 97 |     if args.run_mode =='remote':
 98 |         print("Data Files were written successfully to AZML Default Data Store folder")
 99 |     
100 |     final_df.to_parquet(os.path.join(args.prep_data, "final_df.parquet"))
101 |     test_df.to_parquet(os.path.join(args.prep_data, "test_df.parquet"))
102 | 
103 | 
104 | # run script
105 | if __name__ == "__main__":
106 |     # parse args
107 |     args = parse_args()
108 | 
109 |     # run main function
110 |     main(args)
111 | 


--------------------------------------------------------------------------------
/src/workshop/core/data_engineering/feature_engineering.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 2 | code: ./
 3 | command: >-
 4 |   python feature_engineering.py 
 5 |   --input_folder ${{inputs.input_folder}}
 6 |   --prep_data ${{outputs.prep_data}}
 7 |   --run_mode ${{inputs.run_mode}}
 8 |   
 9 | inputs:
10 |   input_folder:
11 |     type: uri_folder
12 |     path: azureml://datastores/workspaceblobstore/paths/mlops_workshop_data/ 
13 |   run_mode: "remote"
14 |   
15 | outputs:
16 |   prep_data:
17 |     type: uri_folder
18 |     path: azureml://datastores/workspaceblobstore/paths/mlops_workshop_data/
19 |     
20 |     
21 | environment:
22 |   conda_file: ./conda_feature_engineering.yml
23 |   image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest
24 |   
25 | compute: azureml:cpu-cluster 
26 | display_name: feature-engineering
27 | experiment_name: feature-engineering
28 | description: feature engineering
29 | 


--------------------------------------------------------------------------------
/src/workshop/core/evaluating/conda_ml_evaluating.yml:
--------------------------------------------------------------------------------
 1 | name: evaluating
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip=21.3.1
 7 |   - pip:
 8 |     - azureml-sdk==1.38.0
 9 |     - azureml-mlflow==1.38.0
10 |     - pandas==1.3.5
11 |     - scikit-learn==1.0.2


--------------------------------------------------------------------------------
/src/workshop/core/evaluating/ml_evaluating.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import os
  4 | import argparse
  5 | from azureml.core import Run, Dataset,Datastore, Workspace
  6 | from sklearn.linear_model import LinearRegression
  7 | from sklearn.ensemble import RandomForestRegressor
  8 | from sklearn.model_selection import train_test_split
  9 | from sklearn.pipeline import Pipeline
 10 | from sklearn.preprocessing import OneHotEncoder
 11 | from sklearn.impute import SimpleImputer
 12 | from sklearn.compose import ColumnTransformer
 13 | from sklearn.metrics import r2_score, mean_absolute_percentage_error, mean_squared_error
 14 | import joblib
 15 | from azureml.core import Model
 16 | import mlflow
 17 | def parse_args():
 18 |     # setup arg parser
 19 |     parser = argparse.ArgumentParser()
 20 | 
 21 |     parser.add_argument("--input_file_name", type=str, default="test_df.parquet")
 22 |     parser.add_argument("--prep_data", default="data", type=str, help="Path to prepped data")
 23 |     parser.add_argument("--model_folder", default="data", type=str, help="Path to model data")
 24 |     parser.add_argument("--model_name",default='nyc_fare_prediction',type=str, help="Name of the model in workspace")
 25 |     parser.add_argument("--run_mode", type=str, default="local")
 26 | 
 27 | 
 28 |     # parse args
 29 |     args = parser.parse_args()
 30 | 
 31 |     # return args
 32 |     return args
 33 | 
 34 | 
 35 | 
 36 | def main(args):
 37 |     if args.run_mode =='remote':
 38 |         run = Run.get_context()
 39 |         ws = run.experiment.workspace
 40 |         run_id = run.id
 41 |     
 42 |     # read in data
 43 |     test_df = pd.read_parquet(os.path.join(args.prep_data,args.input_file_name))
 44 | 
 45 |     catg_cols = ["vendorID", "month_num", "day_of_month", "normalizeHolidayName", "isPaidTimeOff"]
 46 |     # num_cols = ["passengerCount", "tripDistance", "precipTime", "temperature", "precipDepth", "hr_sin", "hr_cos", "dy_sin", "dy_cos"]
 47 |     label = ["totalAmount"]
 48 |     # make sure categorical columns are strings
 49 |     test_df[catg_cols] = test_df[catg_cols].astype("str")
 50 |     
 51 |     # split data
 52 |     y_test = test_df[label]
 53 |     X_test = test_df.drop(label, axis=1)
 54 |     
 55 |     # load model'
 56 |     
 57 |     if args.run_mode =='local':
 58 |         model_file = "linear_regression.joblib"
 59 |         model_path=os.path.join(args.model_folder,model_file)
 60 |         current_model = joblib.load(model_path)
 61 |         y_pred_current = current_model.predict(X_test)                              
 62 |         r2 = r2_score(y_test, y_pred_current)
 63 |         mape = mean_absolute_percentage_error(y_test, y_pred_current)
 64 |         rmse = np.sqrt(mean_squared_error(y_test, y_pred_current))
 65 |         print("Evaluation finished! Metrics:")
 66 |         print(f"R2:", r2)
 67 |         print(f"MAPE:", mape)
 68 |         print(f"RMSE:", rmse)
 69 | 
 70 |     if args.run_mode =='remote':
 71 |            
 72 |         for model_file in os.listdir(args.model_folder):
 73 |             if ".joblib" in model_file:
 74 |                 candidate_model_file=model_file
 75 |         candidate_model_path=os.path.join(args.model_folder,candidate_model_file)
 76 |         candidate_model = joblib.load(candidate_model_path)
 77 |         
 78 |         y_pred_candidate = candidate_model.predict(X_test)                               
 79 |         r2_candidate = r2_score(y_test, y_pred_candidate)
 80 |         mape_candidate = mean_absolute_percentage_error(y_test, y_pred_candidate)
 81 |         rmse_candidate = np.sqrt(mean_squared_error(y_test, y_pred_candidate))
 82 |         mlflow.log_metric("mape_candidate",mape_candidate)
 83 |         mlflow.log_metric("r2_candidate",r2_candidate)
 84 |         mlflow.log_metric("rmse_candidate",rmse_candidate)
 85 |         
 86 |         current_model=None
 87 | 
 88 |         try:
 89 |             current_model_aml = Model(ws,args.model_name)
 90 |             os.makedirs("current_model", exist_ok=True)
 91 |             current_model_aml.download("current_model",exist_ok=True)
 92 |             current_model = mlflow.sklearn.load_model(os.path.join("current_model",args.model_name))
 93 |         except:
 94 |             print("Model does not exist")
 95 |     
 96 |         if current_model: #current model exist, perform evaluation
 97 |             # test 2 algorithms
 98 |             y_pred_current = current_model.predict(X_test)                              
 99 |             r2_current = r2_score(y_test, y_pred_current)
100 |             mape_current = mean_absolute_percentage_error(y_test, y_pred_current)
101 |             rmse_current = np.sqrt(mean_squared_error(y_test, y_pred_current))
102 |             mlflow.log_metric("mape_current",mape_current)
103 |             mlflow.log_metric("r2_current",r2_current)
104 |             mlflow.log_metric("rmse_current",rmse_current)
105 |             if r2_candidate >= r2_current:
106 |                 print("better model found, registering")
107 |                 mlflow.sklearn.log_model(candidate_model,args.model_name)
108 |                 model_uri = f'runs:/{run_id}/{args.model_name}'
109 |                 mlflow.register_model(model_uri,args.model_name)
110 | 
111 |             else:
112 |                 raise Exception("candidate model does not perform better, exiting")
113 |         
114 |         else:
115 |             print("First time model train, registering")
116 |             mlflow.sklearn.log_model(candidate_model,args.model_name)
117 |             model_uri = f'runs:/{run_id}/{args.model_name}'
118 |             mlflow.register_model(model_uri,args.model_name)
119 | 
120 | # run script
121 | if __name__ == "__main__":
122 |     # parse args
123 |     args = parse_args()
124 | 
125 |     # run main function
126 |     main(args)
127 | 


--------------------------------------------------------------------------------
/src/workshop/core/evaluating/ml_evaluating.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 2 | code: ./
 3 | command: >-
 4 |   python ml_evaluating.py 
 5 |   --prep_data ${{inputs.prep_data}}
 6 |   --model_folder ${{inputs.model_folder}}
 7 |   --run_mode ${{inputs.run_mode}}
 8 |   
 9 | inputs:
10 |   prep_data:
11 |     path: azureml://datastores/workspaceblobstore/paths/mlops_workshop_data/
12 |   model_folder:
13 |     path: azureml://datastores/workspaceblobstore/paths/mlops_workshop_data/
14 |   run_mode: "remote"
15 |   
16 | environment:
17 |   conda_file: ./conda_ml_evaluating.yml
18 |   image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest
19 |   
20 | compute: azureml:cpu-cluster
21 | display_name: ml-evaluation
22 | experiment_name: ml-evaluation
23 | description: ml-evaluation
24 | 


--------------------------------------------------------------------------------
/src/workshop/core/pipelines/adf/adf_pipeline.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Azure_SQL_ML_Pipeline",
 3 |     "properties": {
 4 |         "activities": [
 5 |             {
 6 |                 "name": "copy data from sql",
 7 |                 "type": "Copy",
 8 |                 "dependsOn": [],
 9 |                 "policy": {
10 |                     "timeout": "0.12:00:00",
11 |                     "retry": 0,
12 |                     "retryIntervalInSeconds": 30,
13 |                     "secureOutput": false,
14 |                     "secureInput": false
15 |                 },
16 |                 "userProperties": [],
17 |                 "typeProperties": {
18 |                     "source": {
19 |                         "type": "AzureSqlSource",
20 |                         "sqlReaderQuery": {
21 |                             "value": "@concat('select * from green_taxi  WHERE lpepPickupDatetime >','''',formatDateTime(adddays(utcnow(),-3190), 'yyyy-MM-dd'),'''')\n",
22 |                             "type": "Expression"
23 |                         },
24 |                         "queryTimeout": "02:00:00",
25 |                         "partitionOption": "None"
26 |                     },
27 |                     "sink": {
28 |                         "type": "ParquetSink",
29 |                         "storeSettings": {
30 |                             "type": "AzureBlobStorageWriteSettings"
31 |                         },
32 |                         "formatSettings": {
33 |                             "type": "ParquetWriteSettings"
34 |                         }
35 |                     },
36 |                     "enableStaging": false,
37 |                     "translator": {
38 |                         "type": "TabularTranslator",
39 |                         "typeConversion": true,
40 |                         "typeConversionSettings": {
41 |                             "allowDataTruncation": true,
42 |                             "treatBooleanAsNumber": false
43 |                         }
44 |                     }
45 |                 },
46 |                 "inputs": [
47 |                     {
48 |                         "referenceName": "AzureSqlDemo",
49 |                         "type": "DatasetReference"
50 |                     }
51 |                 ],
52 |                 "outputs": [
53 |                     {
54 |                         "referenceName": "parquetdata",
55 |                         "type": "DatasetReference"
56 |                     }
57 |                 ]
58 |             },
59 |             {
60 |                 "name": "Machine Learning Execute Pipeline",
61 |                 "type": "AzureMLExecutePipeline",
62 |                 "dependsOn": [
63 |                     {
64 |                         "activity": "copy data from sql",
65 |                         "dependencyConditions": [
66 |                             "Succeeded"
67 |                         ]
68 |                     }
69 |                 ],
70 |                 "policy": {
71 |                     "timeout": "0.12:00:00",
72 |                     "retry": 0,
73 |                     "retryIntervalInSeconds": 30,
74 |                     "secureOutput": false,
75 |                     "secureInput": false
76 |                 },
77 |                 "userProperties": [],
78 |                 "typeProperties": {
79 |                     "mlPipelineEndpointId": "3337b14a-4a0a-47d3-817b-e88e1e7c68e6"
80 |                 },
81 |                 "linkedServiceName": {
82 |                     "referenceName": "amlws01ent",
83 |                     "type": "LinkedServiceReference"
84 |                 }
85 |             }
86 |         ],
87 |         "annotations": [],
88 |         "lastPublishTime": "2022-10-05T21:24:10Z"
89 |     },
90 |     "type": "Microsoft.DataFactory/factories/pipelines"
91 | }


--------------------------------------------------------------------------------
/src/workshop/core/pipelines/batch_scoring_pipeline.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
 2 | type: pipeline
 3 | display_name: MLOps-Batch-Scoring-Pipeline
 4 | compute: azureml:cpu-cluster
 5 | settings:
 6 |   force_rerun: true
 7 | jobs:
 8 |   data_engineering:
 9 |     type: command
10 |     component: ./data_engineering_comp.yml
11 |     inputs:
12 |       input_folder:
13 |         type: uri_folder
14 |         mode: ro_mount
15 |         path: azureml://datastores/workspaceblobstore/paths/mlops_workshop_data/batch_scoring/inputs
16 |     outputs:
17 |       output_folder:
18 |         type: mltable
19 |         mode: rw_mount
20 |   scoring:
21 |     type: parallel
22 |     mini_batch_size: "1"
23 |     mini_batch_error_threshold: -1
24 |     max_concurrency_per_instance: 2
25 |     retry_settings:
26 |       max_retries: 1
27 |       timeout: 60
28 |     resources:
29 |       instance_count: 2
30 |     inputs:
31 |       scoring_data_folder:
32 |         type: mltable
33 |         mode: eval_mount
34 |         path: ${{parent.jobs.data_engineering.outputs.output_folder}}
35 |     outputs:
36 |       predictions_data_folder:
37 |         type: uri_folder
38 |         mode: rw_mount
39 |         path: azureml://datastores/workspaceblobstore/paths/mlops_workshop_data/batch_scoring/predictions
40 |       prediction_log:
41 |         type: uri_file
42 |         mode: rw_mount
43 |     input_data: ${{inputs.scoring_data_folder}}
44 |     task:
45 |       type: function
46 |       code: ../scoring/batch_scoring
47 |       entry_script: batch_score.py
48 |       environment: 
49 |         name: mlops_batchscoring
50 |         conda_file: ../scoring/conda.yml
51 |         image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest
52 |       program_arguments: --predictions_data_folder ${{outputs.predictions_data_folder}}
53 |       append_row_to: ${{outputs.prediction_log}}
54 | 
55 | 


--------------------------------------------------------------------------------
/src/workshop/core/pipelines/data_engineering_comp.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: data_engineering
 3 | display_name: data engineering
 4 | version: 10
 5 | type: command
 6 | code: ../scoring/batch_scoring
 7 | command: >-
 8 |   python data_engineering.py --input_folder ${{inputs.input_folder}} --output_folder ${{outputs.output_folder}};
 9 | inputs:
10 |   input_folder:
11 |     type: uri_folder
12 | outputs:
13 |   output_folder:
14 |     type: mltable
15 | is_deterministic: false
16 | environment: 
17 |   name: mlops_batchscoring
18 |   conda_file: ../scoring/conda.yml
19 |   image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest


--------------------------------------------------------------------------------
/src/workshop/core/pipelines/training_pipeline.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
 2 | type: pipeline
 3 | display_name: Training_pipeline
 4 | experiment_name: Training_pipeline
 5 | compute: azureml:cpu-cluster
 6 | 
 7 | jobs:
 8 |   prep_job:
 9 |     type: command
10 |     code: ../data_engineering
11 |     command: >-
12 |       python feature_engineering.py 
13 |       --input_folder ${{inputs.input_folder}}
14 |       --prep_data ${{outputs.prep_data}}
15 |       --run_mode ${{inputs.run_mode}}
16 |     inputs:
17 |       input_folder:
18 |         type: uri_folder
19 |         path: azureml://datastores/workspaceblobstore/paths/mlops_workshop_data/ 
20 |       run_mode: "remote"
21 |     outputs:
22 |       prep_data:
23 |         type: uri_folder
24 |         path: azureml://datastores/workspaceblobstore/paths/mlops_workshop_data/
25 |         mode: rw_mount
26 |     environment:
27 |       conda_file: ../data_engineering/conda_feature_engineering.yml
28 |       image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest
29 |     description: Feature Engineering
30 |     
31 |   train_job:
32 |     type: command
33 |     code: ../training
34 |     command: >-
35 |       python ml_training.py 
36 |       --prep_data ${{inputs.prep_data}}
37 |       --model_folder ${{outputs.model_folder}}
38 |       --run_mode ${{inputs.run_mode}}
39 |     inputs:
40 |       prep_data: ${{parent.jobs.prep_job.outputs.prep_data}}
41 |       run_mode: "remote"
42 |     outputs:
43 |       model_folder:
44 |         type: uri_folder
45 |         path: azureml://datastores/workspaceblobstore/paths/mlops_workshop_data/
46 |         mode: rw_mount
47 |     environment:
48 |       conda_file: ../training/conda_ml_training.yml
49 |       image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest
50 |     description: ML Training
51 |     
52 |   evaluate_job:
53 |     type: command
54 |     code: ../evaluating
55 |     command: >-
56 |       python ml_evaluating.py 
57 |       --run_mode ${{inputs.run_mode}}
58 |       --model_name ${{inputs.model_name}}
59 |       --prep_data ${{inputs.prep_data}}
60 |       --model_folder ${{inputs.model_folder}}
61 |     inputs:
62 |       run_mode: "remote"
63 |       model_name: "nyc_fare_prediction"
64 |       prep_data: ${{parent.jobs.prep_job.outputs.prep_data}}
65 |       model_folder: ${{parent.jobs.train_job.outputs.model_folder}}
66 | 
67 |     environment:
68 |       conda_file: ../evaluating/conda_ml_evaluating.yml
69 |       image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest
70 |     description: model-evaluation
71 |         
72 | 


--------------------------------------------------------------------------------
/src/workshop/core/scoring/batch_scoring/batch_score.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import tempfile
 4 | import logging
 5 | from azureml.core.model import Model
 6 | import pickle
 7 | import pandas as pd
 8 | from azureml.core import Run
 9 | import os
10 | import mlflow
11 | import argparse,os,datetime
12 | 
13 | def init():
14 |     global model,predictions_data_folder
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument("--predictions_data_folder", type=str)
17 |     parser.add_argument("--model_name",default='nyc_fare_prediction',type=str, help="Name of the model in workspace")
18 |     args, unknown = parser.parse_known_args()
19 |     predictions_data_folder = args.predictions_data_folder
20 |     print("predictions_data_folder",predictions_data_folder)
21 |     current_run = Run.get_context()
22 |     ws = current_run.experiment.workspace
23 |     model = Model(ws,args.model_name)
24 |     model.download(exist_ok=True)
25 |     model = mlflow.sklearn.load_model(args.model_name)
26 | 
27 | def run(mini_batch):
28 | 
29 |     
30 |     print(f'run method start: {__file__}, run({mini_batch})')
31 |     i =0
32 |     for file in mini_batch:
33 |         # prepare each image
34 |         data = pd.read_parquet(file)
35 |         print("data shape ", data.shape)
36 |         predictions = model.predict(data)
37 |         data["prediction"] =predictions
38 |         today = datetime.datetime.today()
39 |         year = today.year
40 |         month = today.month
41 |         day = today.day
42 |         folder = "{:02d}-{:02d}-{:4d}".format(month,day,year) 
43 |         os.makedirs(predictions_data_folder+"/"+folder, exist_ok=True)
44 |         data.to_csv(predictions_data_folder+"/"+folder+"/prediction.csv")
45 |         i+=1
46 | 
47 | 
48 |     return [1]*i
49 | 


--------------------------------------------------------------------------------
/src/workshop/core/scoring/batch_scoring/conda.yml:
--------------------------------------------------------------------------------
 1 | name: workshop-online-scoring
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8.12
 6 |   - pip=21.3.1
 7 |   - pip:
 8 |     - azureml-mlflow==1.38.0
 9 |     - azureml-defaults==1.38.0
10 |     - pandas==1.3.5
11 |     - scikit-learn==1.0.2


--------------------------------------------------------------------------------
/src/workshop/core/scoring/batch_scoring/data_engineering.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from datetime import datetime
  4 | import argparse
  5 | import os
  6 | 
  7 | import argparse,os
  8 | import pandas as pd
  9 | import datetime
 10 | # data engineering
 11 | 
 12 | # read arguments
 13 | 
 14 | def parse_args():
 15 |     # setup arg parser
 16 |     parser = argparse.ArgumentParser()
 17 | 
 18 | 
 19 |     # add arguments
 20 |     parser.add_argument("--nyc_file_name", type=str, default="green_taxi.parquet")
 21 |     parser.add_argument("--public_holiday_file_name", type=str, default="holidays.parquet")
 22 |     parser.add_argument("--weather_file_name", type=str, default="weather.parquet")
 23 |     parser.add_argument('--input_folder', type=str)
 24 |     parser.add_argument('--output_folder', type=str)
 25 | 
 26 |     # parse args
 27 |     args = parser.parse_args()
 28 | 
 29 |     # return args
 30 |     return args
 31 | 
 32 | 
 33 | def build_time_features(vector):
 34 |     pickup_datetime = vector[0]
 35 |     month_num = pickup_datetime.month
 36 |     day_of_month = pickup_datetime.day
 37 |     day_of_week = pickup_datetime.weekday()
 38 |     hour_of_day = pickup_datetime.hour
 39 |     country_code = "US"
 40 |     hr_sin = np.sin(hour_of_day*(2.*np.pi/24))
 41 |     hr_cos = np.cos(hour_of_day*(2.*np.pi/24))
 42 |     dy_sin = np.sin(day_of_week*(2.*np.pi/7))
 43 |     dy_cos = np.cos(day_of_week*(2.*np.pi/7))
 44 |     
 45 |     return pd.Series((month_num, day_of_month, day_of_week, hour_of_day, country_code, hr_sin, hr_cos, dy_sin, dy_cos))
 46 | def engineer_features(green_taxi_df,holidays_df,weather_df ):
 47 | 
 48 |     green_taxi_df[["month_num", "day_of_month","day_of_week", "hour_of_day", "country_code", "hr_sin", "hr_cos", "dy_sin", "dy_cos"]] = \
 49 |         green_taxi_df[["lpepPickupDatetime"]].apply(build_time_features, axis=1)
 50 | 
 51 |     columns_to_remove = ["lpepDropoffDatetime", "puLocationId", "doLocationId", "extra", "mtaTax",
 52 |                      "improvementSurcharge", "tollsAmount", "ehailFee", "tripType", "rateCodeID", 
 53 |                      "storeAndFwdFlag", "paymentType", "fareAmount", "tipAmount"]
 54 | 
 55 |     green_taxi_df.drop(columns_to_remove, axis=1, inplace=True)
 56 | 
 57 | 
 58 |     green_taxi_df["datetime"] = green_taxi_df["lpepPickupDatetime"].dt.normalize()
 59 | 
 60 | 
 61 |     holidays_df = holidays_df.rename(columns={"countryRegionCode": "country_code"})
 62 |     holidays_df["datetime"] = holidays_df["date"].dt.normalize()
 63 | 
 64 |     holidays_df.drop(["countryOrRegion", "holidayName", "date"], axis=1, inplace=True)
 65 | 
 66 |     taxi_holidays_df = pd.merge(green_taxi_df, holidays_df, how="left", on=["datetime", "country_code"])
 67 |     taxi_holidays_df[taxi_holidays_df["normalizeHolidayName"].notnull()]
 68 |     
 69 | 
 70 | 
 71 |     weather_df["datetime"] = weather_df["datetime"].dt.normalize()
 72 | 
 73 |     # group by datetime
 74 |     aggregations = {"precipTime": "max", "temperature": "mean", "precipDepth": "max"}
 75 |     weather_df_grouped = weather_df.groupby("datetime").agg(aggregations)
 76 | 
 77 |     taxi_holidays_weather_df = pd.merge(taxi_holidays_df, weather_df_grouped, how="left", on=["datetime"])
 78 | 
 79 |     final_df = taxi_holidays_weather_df.query("pickupLatitude>=40.53 and pickupLatitude<=40.88 and \
 80 |                                            pickupLongitude>=-74.09 and pickupLongitude<=-73.72 and \
 81 |                                            tripDistance>0 and tripDistance<75 and \
 82 |                                            passengerCount>0 and passengerCount<100")
 83 |     return final_df
 84 | 
 85 | def main(args):
 86 |     
 87 |     # read in data
 88 |     today = datetime.datetime.today()
 89 |     year = today.year
 90 |     month = today.month
 91 |     day = today.day
 92 |     folder = "{:02d}-{:02d}-{:4d}".format(month,day,year)
 93 |     green_taxi_df = pd.read_parquet(os.path.join(args.input_folder,folder, args.nyc_file_name))
 94 | 
 95 | 
 96 |     holidays_df = pd.read_parquet(os.path.join(args.input_folder,folder, args.public_holiday_file_name))
 97 | 
 98 |     weather_df = pd.read_parquet(os.path.join(args.input_folder,folder,args.weather_file_name))
 99 | 
100 |     final_df = engineer_features(green_taxi_df, holidays_df, weather_df)
101 |     # if os.path.exists(args.output_folder):
102 |     #     os.remove(args.output_folder)
103 |  
104 |     final_df.to_parquet(args.output_folder+"/data.parquet")
105 |     print("done writing data")
106 |     ml_table_content = """
107 | paths:
108 |   - pattern: ./*.parquet
109 |     """
110 |     with open(os.path.join(args.output_folder,"MLTable"),'w') as mltable_file:
111 |         mltable_file.writelines(ml_table_content)
112 | 
113 | 
114 | 
115 | # run script
116 | if __name__ == "__main__":
117 |     # parse args
118 |     args = parse_args()
119 | 
120 |     # run main function
121 |     main(args)
122 | 


--------------------------------------------------------------------------------
/src/workshop/core/scoring/conda.yml:
--------------------------------------------------------------------------------
 1 | name: workshop-online-scoring
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8.12
 6 |   - pip=21.3.1
 7 |   - pip:
 8 |     - azureml-mlflow==1.38.0
 9 |     - azureml-defaults==1.38.0
10 |     - pandas==1.3.5
11 |     - scikit-learn==1.0.2


--------------------------------------------------------------------------------
/src/workshop/core/scoring/deployment.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
 2 | name: green
 3 | endpoint_name: mlops-workshop-endpoint #setup replace `mlops-workshop-endpoint` with your own endpoint name defined in endpoint.yml
 4 | model: azureml:nyc_fare_prediction:1
 5 | code_configuration:
 6 |   code: ./
 7 |   scoring_script: score.py
 8 | environment:
 9 |   conda_file: ./conda.yml
10 |   image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1
11 | instance_type: Standard_DS2_V2
12 | instance_count: 1
13 | 


--------------------------------------------------------------------------------
/src/workshop/core/scoring/endpoint.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
2 | name: mlops-workshop-endpoint #setup replace `mlops-workshop-endpoint` with your own endpoint name. It has to be globally unique
3 | auth_mode: key
4 | 


--------------------------------------------------------------------------------
/src/workshop/core/scoring/score.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | import pandas as pd
 4 | import os
 5 | from azureml.core.model import Model
 6 | import mlflow
 7 | # Called when the service is loaded
 8 | def init():
 9 |     global model
10 |     # Get the path to the deployed model file and load it
11 |     model_dir =os.getenv('AZUREML_MODEL_DIR')
12 |     model_file = os.listdir(model_dir)[0]
13 |     model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), model_file)
14 |     model = mlflow.sklearn.load_model(model_path)
15 | # Called when a request is received
16 | def run(raw_data):
17 |     try:
18 |         # Get the input data 
19 |         data=pd.DataFrame(json.loads(raw_data)['data'])
20 |         # Get a prediction from the model
21 |         predictions = model.predict(data)
22 |         return json.dumps(predictions.tolist())
23 |     except Exception as e:
24 |         error= str(e)
25 |         return json.dumps(error)


--------------------------------------------------------------------------------
/src/workshop/core/scoring/scoring_test_request.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "data": {
 3 |         "vendorID": {"715": "2", "3633": "2"},
 4 |         "lpepPickupDatetime": {"715": "2016-01-04 20:48:38","3633": "2016-02-15 20:35:58"},
 5 |         "passengerCount": {"715": "1", "3633": "1"},
 6 |         "tripDistance": {"715": "1.14", "3633": "6.43"},
 7 |         "pickupLongitude": {"715": "-73.97727966308594", "3633": "-73.95679473876953"},
 8 |         "pickupLatitude": {"715": "40.68115234375", "3633": "40.74812316894531"},
 9 |         "dropoffLongitude": {"715": "-73.96723175048828", "3633": "-73.9059066772461"},
10 |         "dropoffLatitude": {"715": "40.67363739013672", "3633": "40.76784896850586"},
11 |         "month_num": {"715": "1", "3633": "2"},
12 |         "day_of_month": {"715": "4", "3633": "15"},
13 |         "day_of_week": {"715": "0", "3633": "0"},
14 |         "hour_of_day": {"715": "20", "3633": "20"},
15 |         "country_code": {"715": "US", "3633": "US"},
16 |         "hr_sin": {"715": "-0.866025403784439", "3633": "-0.866025403784439"},
17 |         "hr_cos": {"715": "0.4999999999999992", "3633": "0.4999999999999992"},
18 |         "dy_sin": {"715": "0.0", "3633": "0.0"},
19 |         "dy_cos": {"715": "1.0", "3633": "1.0"},
20 |         "datetime": {"715": "2016-01-04 00:00:00", "3633": "2016-02-15 00:00:00"},
21 |         "normalizeHolidayName": {"715": "nan", "3633": "Washington's Birthday"},
22 |         "isPaidTimeOff": {"715": "nan", "3633": "True"},
23 |         "precipTime": {"715": "1.0", "3633": "24.0"},
24 |         "temperature": {"715": "0.12389380530973423", "3633": "-6.222602739726026"},
25 |         "precipDepth": {"715": "0.0", "3633": "9999.0"}
26 |     }
27 | }


--------------------------------------------------------------------------------
/src/workshop/core/training/conda_ml_training.yml:
--------------------------------------------------------------------------------
 1 | name: ml-training
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip=21.3.1
 7 |   - pip:
 8 |     - azureml-sdk==1.38.0
 9 |     - azureml-mlflow==1.38.0
10 |     - pandas==1.3.5
11 |     - scikit-learn==1.0.2


--------------------------------------------------------------------------------
/src/workshop/core/training/ml_training.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import os
  4 | import argparse
  5 | import mlflow
  6 | import mlflow.sklearn
  7 | from azureml.core import Run, Dataset,Datastore, Workspace
  8 | from sklearn.linear_model import LinearRegression
  9 | from sklearn.ensemble import RandomForestRegressor
 10 | from sklearn.linear_model import Ridge
 11 | from sklearn.model_selection import train_test_split
 12 | from sklearn.pipeline import Pipeline
 13 | from sklearn.preprocessing import OneHotEncoder
 14 | from sklearn.impute import SimpleImputer
 15 | from sklearn.compose import ColumnTransformer
 16 | from sklearn.metrics import r2_score, mean_absolute_percentage_error, mean_squared_error
 17 | import joblib
 18 | def parse_args():
 19 |     # arg parser
 20 |     parser = argparse.ArgumentParser()
 21 | 
 22 |     parser.add_argument("--prep_data", default="data", type=str, help="Path to prepped data, default to local folder")
 23 |     parser.add_argument("--model_folder", type=str,default="data", help="Path of model ouput folder, default to local folder")
 24 |     parser.add_argument("--input_file_name", type=str, default="final_df.parquet")
 25 |     parser.add_argument("--run_mode", type=str, default="local")
 26 | 
 27 | 
 28 |     # parse args
 29 |     args = parser.parse_args()
 30 | 
 31 |     # return args
 32 |     return args
 33 | 
 34 | 
 35 | def createClassModel(algo_name, catg, nums):
 36 |     numeric_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='constant', fill_value=0))])
 37 | 
 38 |     categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='constant', fill_value="MISSING")), ('onehot', OneHotEncoder(handle_unknown='ignore'))])
 39 | 
 40 |     preprocesser = ColumnTransformer(transformers=[('num', numeric_transformer, nums), ('cat', categorical_transformer, catg)])
 41 | 
 42 |     if algo_name == 'linear_regression':
 43 |         #---------------------------------------------
 44 |         #setup: Update alpha value
 45 |         #---------------------------------------------
 46 |         model = Ridge(alpha=100000)  #setup
 47 |     elif algo_name == 'random_forest':
 48 |         model = RandomForestRegressor()
 49 |     else:
 50 |         pass
 51 |     
 52 |     ModelPipeline = Pipeline(steps=[('preprocessor', preprocesser), ("model", model)])
 53 |     
 54 |     return ModelPipeline
 55 | 
 56 | def main(args):
 57 |     
 58 |     # read in data
 59 |     final_df = pd.read_parquet(os.path.join(args.prep_data,args.input_file_name))
 60 |     catg_cols = ["vendorID", "month_num", "day_of_month", "normalizeHolidayName", "isPaidTimeOff"]
 61 |     num_cols = ["passengerCount", "tripDistance", "precipTime", "temperature", "precipDepth", "hr_sin", "hr_cos", "dy_sin", "dy_cos"]
 62 |     label = ["totalAmount"]
 63 |     # make sure categorical columns are strings
 64 |     final_df[catg_cols] = final_df[catg_cols].astype("str")
 65 | 
 66 |     # split data
 67 |     X_train, X_test, y_train, y_test = train_test_split(final_df.drop(label, axis=1), final_df[label], test_size=0.2, random_state=222)
 68 | 
 69 |     # test 2 algorithms
 70 |     os.makedirs(args.model_folder, exist_ok=True)
 71 | 
 72 |     algorithmname = "linear_regression"
 73 |     fitPipeline = createClassModel(algorithmname, catg_cols, num_cols) # get pipeline
 74 |     fitPipeline.fit(X_train, y_train.values.ravel())                   # fit pipeine
 75 | 
 76 |     y_pred = fitPipeline.predict(X_test)                               # score with fitted pipeline
 77 | 
 78 |     # Evaluate
 79 |     r2 = r2_score(y_test, y_pred)
 80 |     mape = mean_absolute_percentage_error(y_test, y_pred)
 81 |     rmse = np.sqrt(mean_squared_error(y_test, y_pred))
 82 |     
 83 |     
 84 |     joblib.dump(fitPipeline,args.model_folder+"/"+algorithmname+".joblib")
 85 |         
 86 |     print("Training finished!. Metrics:")
 87 |     print(f"R2_{algorithmname}", r2)
 88 |     print(f"MAPE_{algorithmname}", mape)
 89 |     print(f"RMSE_{algorithmname}", rmse)
 90 |     print("Model",args.model_folder+"/"+algorithmname+".joblib","saved!")
 91 |     
 92 |     if args.run_mode == 'remote':
 93 |         mlflow.log_metric(f"R2_{algorithmname}", r2)
 94 |         mlflow.log_metric(f"MAPE_{algorithmname}", mape)
 95 |         mlflow.log_metric(f"RMSE_{algorithmname}", rmse)
 96 |         mlflow.sklearn.log_model(fitPipeline,f"{algorithmname}_model")
 97 | 
 98 | # run script
 99 | if __name__ == "__main__":
100 |     # parse args
101 |     args = parse_args()
102 |     # run main function
103 |     main(args)


--------------------------------------------------------------------------------
/src/workshop/core/training/ml_training.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 2 | code: ./
 3 | command: >-
 4 |   python ml_training.py 
 5 |   --prep_data ${{inputs.prep_data}}
 6 |   --model_folder ${{outputs.model_folder}}
 7 |   --run_mode ${{inputs.run_mode}}
 8 |   
 9 | inputs:
10 |   prep_data:
11 |     type: uri_folder
12 |     path: azureml://datastores/workspaceblobstore/paths/mlops_workshop_data/
13 |   run_mode: "remote"
14 |     
15 | outputs:
16 |   model_folder:
17 |     type: uri_folder
18 |     path: azureml://datastores/workspaceblobstore/paths/mlops_workshop_data/
19 | 
20 | environment:
21 |   conda_file: ./conda_ml_training.yml
22 |   image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest
23 | compute: azureml:cpu-cluster
24 | display_name: ml-training
25 | experiment_name: ml-training
26 | description: ml-training
27 | 


--------------------------------------------------------------------------------
/src/workshop/data/linear_regression.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/data/linear_regression.joblib


--------------------------------------------------------------------------------
/src/workshop/documents/EZMLOps_introduction.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/EZMLOps_introduction.pptx


--------------------------------------------------------------------------------
/src/workshop/documents/IaC/createSP.azcli:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | echo "This script will help you to create Azure Resources for EZ-MLOps workshop."
  4 | echo "For your information following Azure resources will be create in a Resource Group level"
  5 | echo ""
  6 | echo "* Service Principal"
  7 | echo ""
  8 | 
  9 | read -p "0. Please hit Enter to run the script >>"
 10 | 
 11 | # Select Azure subscription
 12 | az account list --output table
 13 | 
 14 | echo ""
 15 | 
 16 | read -p "1. TYPE your subscription Name for this workshop case-sensitive>>" subName
 17 | 
 18 | if [ ! -z "$subName" ]; then
 19 |     echo "You select " $subName " for the workshop."
 20 |     az account set --subscription "$subName" --verbose
 21 |     subscriptionID=$(az account show --query id -o tsv)
 22 |     echo $subscriptionID
 23 | else
 24 |     echo "Please run the script again!! EXIT"
 25 |     exit
 26 | fi
 27 | 
 28 | chkName=$(az account list --output tsv --query "[?isDefault].name")
 29 | 
 30 | if [ "$subName" = "$chkName" ]; then
 31 |     echo ""
 32 |     echo "Subscripion Name has confirmed"
 33 |     echo ""
 34 | else 
 35 |     echo "Please try again with correct subscription name"
 36 |     echo "EXIT"
 37 |     exit
 38 | fi
 39 | 
 40 | echo ""
 41 | 
 42 | az account list-locations --output table --query []['name','displayName']
 43 | echo ""
 44 | echo "2. Type location for the lab"
 45 | read -p "Location >>" loc
 46 | 
 47 | # set azure region
 48 | if [ ! -z "$loc" ]; then
 49 |     echo "You set location" $loc " for the lab."
 50 | else
 51 |     echo "Default location is East US 2"
 52 |     loc=eastus2
 53 | fi
 54 | 
 55 | echo ""
 56 | echo "3. What is your Resource Group Name"
 57 | read -p "Resource Group Name >>" rgName
 58 | 
 59 | # set azure region
 60 | if [ ! -z "$rgName" ]; then
 61 |     echo "You set Resource Group Name" $rgName " for the lab."
 62 | else
 63 |     echo "Please try again with correct Resource Group name"
 64 |     echo "EXIT"
 65 |     exit
 66 | fi
 67 | 
 68 | chkrgName=$(az group list --output tsv --query "[?name=='$rgName)'].name" )
 69 | 
 70 | if [ "$rgName" = "$chkrgName" ]; then
 71 |     echo ""
 72 |     echo "Resource Group Name has confirmed"
 73 |     echo ""
 74 | else 
 75 |     echo "Please try again with correct subscription name"
 76 |     echo "EXIT"
 77 |     exit
 78 | fi
 79 | 
 80 | 
 81 | # Create Service Principal
 82 | # https://docs.microsoft.com/en-us/cli/azure/create-an-azure-service-principal-azure-cli
 83 | 
 84 | let "randomIdentifier=$RANDOM*$RANDOM"  
 85 | servicePrincipalName="ezmlops-$randomIdentifier"
 86 | roleName="contributor"
 87 | # Verify the ID of the active subscription
 88 | echo "Using subscription ID $subscriptionID"
 89 | echo ""
 90 | echo "Creating SP for RBAC with name $servicePrincipalName,"
 91 | echo ""
 92 | echo "with role $roleName"
 93 | echo ""
 94 | echo "and in scopes /subscriptions/$subscriptionID/resourceGroups/$rgName"
 95 | echo ""
 96 | echo "If you fail this step, you cannot move on to the next step"
 97 | echo ""
 98 | az ad sp create-for-rbac --name $servicePrincipalName --role $roleName --scopes /subscriptions/$subscriptionID/resourceGroups/$rgName > sp.txt
 99 | echo ""
100 | echo "*************************************"
101 | echo "Information about the Service Principal is captured in the file ./sp.txt"
102 | echo "*************************************"
103 | echo ""
104 | cat ./sp.txt
105 | echo ""
106 | 


--------------------------------------------------------------------------------
/src/workshop/documents/IaC/iac_EZ_MLOps.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  3 |   "contentVersion": "1.0.0.0",
  4 |   "parameters": {
  5 |     "name": {
  6 |       "type": "string",
  7 |       "minLength": 5,
  8 |       "maxLength": 8,
  9 |       "metadata": {
 10 |         "description": "Specifies the name of the deployment."
 11 |       }
 12 |     },
 13 |     "vmSize": {
 14 |       "type": "string",
 15 |       "allowedValues": [
 16 |         "Standard_DS2_v2",
 17 |         "Standard_DS3",
 18 |         "Standard_DS3_v2",
 19 |         "Standard_DS4",
 20 |         "Standard_DS4_v2"
 21 |       ],
 22 |       "defaultValue": "Standard_DS3_v2",
 23 |       "metadata": {
 24 |         "description": "Choose VM size for computes"
 25 |       }
 26 |     },
 27 |     "location": {
 28 |       "type": "string",
 29 |       "allowedValues": [
 30 |         "centralus",
 31 |         "eastus",
 32 |         "eastus2",
 33 |         "southcentralus",
 34 |         "westcentralus",
 35 |         "westus"
 36 |       ],
 37 |       "defaultValue": "eastus2",
 38 |       "metadata": {
 39 |         "description": "Specifies the location of the Azure Machine Learning workspace and dependent resources."
 40 |       }
 41 |     }
 42 |   },
 43 |   "variables": {
 44 |     "tenantId": "[subscription().tenantId]",
 45 |     "storageAccountName": "[concat(parameters('name'),'store')]",
 46 |     "keyVaultName": "[concat(parameters('name'),'akv')]",
 47 |     "applicationInsightsName": "[concat(parameters('name'),'appi')]",
 48 |     "containerRegistryName": "[concat(parameters('name'),'acr')]",
 49 |     "workspaceName": "[concat(parameters('name'),'aml')]",
 50 |     "storageAccount": "[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]",
 51 |     "keyVault": "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]",
 52 |     "applicationInsights": "[resourceId('Microsoft.Insights/components', variables('applicationInsightsName'))]",
 53 |     "containerRegistry": "[resourceId('Microsoft.ContainerRegistry/registries', variables('containerRegistryName'))]",
 54 |     "amlciName": "[concat(parameters('name'), 'i', substring(uniqueString(resourceGroup().id),1,3))]",
 55 |     "amlccName": "[concat(parameters('name'), 'c', substring(uniqueString(resourceGroup().id),1,3))]"
 56 |   },
 57 |   "resources": [
 58 |     {
 59 |       "type": "Microsoft.Storage/storageAccounts",
 60 |       "apiVersion": "2021-01-01",
 61 |       "name": "[variables('storageAccountName')]",
 62 |       "location": "[parameters('location')]",
 63 |       "sku": {
 64 |         "name": "Standard_LRS"
 65 |       },
 66 |       "kind": "StorageV2",
 67 |       "properties": {
 68 |         "encryption": {
 69 |           "services": {
 70 |             "blob": {
 71 |               "enabled": true
 72 |             },
 73 |             "file": {
 74 |               "enabled": true
 75 |             }
 76 |           },
 77 |           "keySource": "Microsoft.Storage"
 78 |         },
 79 |         "supportsHttpsTrafficOnly": true
 80 |       }
 81 |     },
 82 |     {
 83 |       "type": "Microsoft.KeyVault/vaults",
 84 |       "apiVersion": "2021-04-01-preview",
 85 |       "name": "[variables('keyVaultName')]",
 86 |       "location": "[parameters('location')]",
 87 |       "properties": {
 88 |         "tenantId": "[variables('tenantId')]",
 89 |         "sku": {
 90 |           "name": "standard",
 91 |           "family": "A"
 92 |         },
 93 |         "accessPolicies": [],
 94 |         "enableSoftDelete": true
 95 |       }
 96 |     },
 97 |     {
 98 |       "type": "Microsoft.Insights/components",
 99 |       "apiVersion": "2020-02-02",
100 |       "name": "[variables('applicationInsightsName')]",
101 |       "location": "[if(or(equals(parameters('location'),'eastus2'), equals(parameters('location'),'westcentralus')),'southcentralus',parameters('location'))]",
102 |       "kind": "web",
103 |       "properties": {
104 |         "Application_Type": "web"
105 |       }
106 |     },
107 |     {
108 |       "type": "Microsoft.ContainerRegistry/registries",
109 |       "sku": {
110 |         "name": "Standard",
111 |         "tier": "Standard"
112 |       },
113 |       "name": "[variables('containerRegistryName')]",
114 |       "apiVersion": "2019-12-01-preview",
115 |       "location": "[parameters('location')]",
116 |       "properties": {
117 |         "adminUserEnabled": true
118 |       }
119 |     },
120 |     {
121 |       "type": "Microsoft.MachineLearningServices/workspaces",
122 |       "apiVersion": "2020-03-01",
123 |       "identity": {
124 |         "type": "systemAssigned"
125 |       },
126 |       "name": "[variables('workspaceName')]",
127 |       "location": "[parameters('location')]",
128 |       "dependsOn": [
129 |         "[variables('storageAccount')]",
130 |         "[variables('keyVault')]",
131 |         "[variables('applicationInsights')]",
132 |         "[variables('containerRegistry')]"
133 |       ],
134 |       "properties": {
135 |         "friendlyName": "[variables('workspaceName')]",
136 |         "storageAccount": "[variables('storageAccount')]",
137 |         "keyVault": "[variables('keyVault')]",
138 |         "applicationInsights": "[variables('applicationInsights')]",
139 |         "containerRegistry": "[variables('containerRegistry')]"
140 |       }
141 |     },
142 |     {
143 |       "type": "Microsoft.MachineLearningServices/workspaces/computes",
144 |       "name": "[concat(variables('workspaceName'), '/', variables('amlciName'))]",
145 |       "apiVersion": "2021-07-01",
146 |       "location": "[parameters('location')]",
147 |       "dependsOn": [
148 |         "[resourceId('Microsoft.MachineLearningServices/workspaces', variables('workspaceName'))]"
149 |       ],
150 |       "properties": {
151 |         "computeType": "ComputeInstance",
152 |         "properties": {
153 |           "vmSize": "[parameters('vmSize')]"
154 |         }
155 |       }
156 |     },
157 |     {
158 |       "type": "Microsoft.MachineLearningServices/workspaces/computes",
159 |       "name": "[concat(variables('workspaceName'), '/', variables('amlccName'))]",
160 |       "apiVersion": "2021-01-01",
161 |       "location": "[parameters('location')]",
162 |       "dependsOn": [
163 |         "[resourceId('Microsoft.MachineLearningServices/workspaces', variables('workspaceName'))]"
164 |       ],
165 |       "properties": {
166 |         "computeType": "AmlCompute",
167 |         "properties": {
168 |           "vmSize": "[parameters('vmSize')]",
169 |           "scaleSettings": {
170 |             "minNodeCount": "0",
171 |             "maxNodeCount": "1"
172 |           }
173 |         }
174 |       }
175 |     }
176 |   ]
177 | }
178 | 


--------------------------------------------------------------------------------
/src/workshop/documents/IaC/iac_cc.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/amlCompute.schema.json 
2 | name: amlcc
3 | type: amlcompute
4 | size: STANDARD_DS3_v2
5 | min_instances: 0
6 | max_instances: 2
7 | idle_time_before_scale_down: 120


--------------------------------------------------------------------------------
/src/workshop/documents/IaC/iac_ci.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/computeInstance.schema.json 
2 | name: amlci
3 | type: computeinstance
4 | size: STANDARD_DS3_v2


--------------------------------------------------------------------------------
/src/workshop/documents/IaC/iac_mlopsworkshop.azcli:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | echo "This script will help you to create Azure Resources for MLOps workshop."
  4 | echo "For your information following Azure resources will be create in a Resource Group:"
  5 | echo ""
  6 | echo "* Azure Resource Group"
  7 | echo "* Azure Machine Learning Service"
  8 | echo "  - Blob Storage Account"
  9 | echo "  - Azure Key Vault"
 10 | echo "  - Azure Container Registry"
 11 | echo "  - Application Insight"
 12 | echo "  - 1 Compute Instance"
 13 | echo "  - 1 Compute Cluster"
 14 | echo "* Service Principal"
 15 | echo ""
 16 | 
 17 | read -p "0. Please hit Enter to run the script >>"
 18 | 
 19 | # Select Azure subscription
 20 | az account list --output table
 21 | 
 22 | echo ""
 23 | 
 24 | read -p "1. TYPE your subscription Name for this workshop case-sensitive>>" subName
 25 | 
 26 | if [ ! -z "$subName" ]; then
 27 |     echo "You select " $subName " for the workshop."
 28 |     az account set --subscription "$subName" --verbose
 29 |     subscriptionID=$(az account show --query id -o tsv)
 30 |     echo $subscriptionID
 31 | else
 32 |     echo "Please run the script again!! EXIT"
 33 |     exit
 34 | fi
 35 | 
 36 | chkName=$(az account list --output tsv --query "[?isDefault].name")
 37 | 
 38 | if [ "$subName" = "$chkName" ]; then
 39 |     echo "Subscripion Name has confirmed"
 40 | else 
 41 |     echo "Please try again with correct subscription name"
 42 |     exit
 43 | fi
 44 | 
 45 | echo ""
 46 | 
 47 | az account list-locations --output table --query []['name','displayName']
 48 | echo ""
 49 | echo "2. Type location for the lab"
 50 | read -p "Location >>" loc
 51 | 
 52 | # set azure region
 53 | if [ ! -z "$loc" ]; then
 54 |     echo "You set location" $loc " for the lab."
 55 | else
 56 |     echo "Default location is West US 2"
 57 |     loc=westus2
 58 | fi
 59 | 
 60 | # if you have exsiting one please use the one
 61 | num=$(shuf -i0-1000 -n1)
 62 | rgName=amlwrkshp-$num-rg #Save it as ps1
 63 | amlName=amlwrkshp-$num
 64 | ciName=amlci$num
 65 | echo $rgName
 66 | 
 67 | echo "Creating Resource Group"
 68 | # Create Resource Group
 69 | az group create -n $rgName -l $loc --tags 'owner=workshop' 'environment=workshop' 'deleteme=afterworkshop'
 70 | 
 71 | echo "Creating Azure Machine Learning Service"
 72 | # Create aml workspace
 73 | az ml workspace create -g $rgName -n $amlName 
 74 | 
 75 | echo "Creating Compute Instance in your $amlName Azure Machine Learning Workspace"
 76 | # Create Compute Instance
 77 | az ml compute create --name amlci$num --size STANDARD_DS11_V2 --type ComputeInstance --resource-group $rgName --workspace-name $amlName
 78 | 
 79 | echo "Creating Compute Cluster in your $amlName Azure Machine Learning Workspace"
 80 | # Create Comput Cluster
 81 | az ml compute create --name amlcc$num --size STANDARD_DS11_V2 --min-instances 0 --max-instances 2 --type AmlCompute --resource-group $rgName --workspace-name $amlName
 82 | 
 83 | # Create Service Principal
 84 | # https://docs.microsoft.com/en-us/cli/azure/create-an-azure-service-principal-azure-cli
 85 | 
 86 | let "randomIdentifier=$RANDOM*$RANDOM"  
 87 | servicePrincipalName="mlops-sp-$randomIdentifier"
 88 | roleName="contributor"
 89 | # Verify the ID of the active subscription
 90 | echo "Using subscription ID $subscriptionID"
 91 | echo ""
 92 | echo "Creating SP for RBAC with name $servicePrincipalName,"
 93 | echo "with role $roleName"
 94 | echo "and in scopes /subscriptions/$subscriptionID/resourceGroups/$resourceGroup"
 95 | echo ""
 96 | az ad sp create-for-rbac --name $servicePrincipalName --role $roleName --scopes /subscriptions/$subscriptionID/resourceGroups/$rgName > sp.txt
 97 | echo ""
 98 | echo "*************************************"
 99 | echo "Information about the Service Principal is captured in the file ./sp.txt"
100 | echo "*************************************"
101 | echo ""
102 | cat ./sp.txt
103 | echo ""
104 | 


--------------------------------------------------------------------------------
/src/workshop/documents/images/arm000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/arm000.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/arm001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/arm001.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/arm002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/arm002.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/arm100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/arm100.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/cicd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/cicd.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/cloudshell-accept.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/cloudshell-accept.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/cloudshell-firstlaunch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/cloudshell-firstlaunch.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/cloudshell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/cloudshell.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/cloudshell2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/cloudshell2.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/github4000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/github4000.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/github4001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/github4001.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/github4002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/github4002.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/github4003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/github4003.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/github4004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/github4004.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/github4005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/github4005.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/github4006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/github4006.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/github4007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/github4007.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/github4008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/github4008.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/monolithic_modular.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/monolithic_modular.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/part3cicd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/part3cicd.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/run_mlopsworkshop_azcli000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/run_mlopsworkshop_azcli000.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/run_mlopsworkshop_azcli001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/run_mlopsworkshop_azcli001.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/run_mlopsworkshop_azcli002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/run_mlopsworkshop_azcli002.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/run_mlopsworkshop_azcli003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/run_mlopsworkshop_azcli003.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/run_mlopsworkshop_azcli004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/run_mlopsworkshop_azcli004.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/run_mlopsworkshop_azcli005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/run_mlopsworkshop_azcli005.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/run_mlopsworkshop_azcli006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/run_mlopsworkshop_azcli006.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/run_mlopsworkshop_azcli007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/run_mlopsworkshop_azcli007.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/run_mlopsworkshop_azcli008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/run_mlopsworkshop_azcli008.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/run_mlopsworkshop_azcli009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/run_mlopsworkshop_azcli009.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/run_mlopsworkshop_azcli010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/run_mlopsworkshop_azcli010.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/training_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/training_pipeline.png


--------------------------------------------------------------------------------
/src/workshop/documents/images/video_img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/MLOpsTemplate/c9b4013e2395786c3a1dc683c7dc17e519f29741/src/workshop/documents/images/video_img.png


--------------------------------------------------------------------------------
/src/workshop/documents/part_1.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Part 1: Structure code for fast iterative development
  3 | ## Pre-requisites
  4 | - Complete [Part 0](part_0.md) to setup the Azure ML workspace. Ensure the following:
  5 | 	- Your conda environment ``mlops-workshop-local`` is activated.
  6 | 	- You completed the step to run [create_datasets.py](part_0.md#option-a-use-compute-instance-for-code-development).
  7 | 
  8 | ## Summary 
  9 | Your team has been working on a new ML problem. The team has been performing exploratory work on data and algorithm and has come to a state that the solution direction is solidified. Now, it is a time to put a structure into the work so that the team can iterate faster toward building a fully functional solution.   
 10 | 
 11 | So far, team members have been working mostly on Jupyter notebooks on their personal compute (Azure CI & PC). As the first step in MLOps, your team needs to accompblish the followings:  
 12 | 
 13 | - Modularization: monolithic notebook is refactored into python modules that can be developed and tested independently and in parallel by multiple members 
 14 | - Parameterization: The modules are parameterized so that they be rerun with different parameter values.
 15 | 
 16 | To illustrate how the process works, the notebook was refactored into a feature engineering module, an ml training module and an ml evaluating module and you will run these modules individually in local development environment to see how they work.
 17 | 
 18 |  ![monolithic to modular](./images/monolithic_modular.png)
 19 | 
 20 | ## Steps
 21 | 
 22 | > Note: You can run following tasks on Compute Instance in your Azure Machine Learning. You can use __Jupyter__ or __VSCode__.
 23 | 
 24 | 1. Familiarize yourself with the steps in this [jupyter
 25 |   notebook](../notebooks/taxi-tutorial.ipynb). This showcases the overall data engineering and model building
 26 |   process. **There is no need to run this as part of this workshop.**
 27 |     > Note: If you do want to run this notebook, it is recommended to run this in a virtual environment using the conda dependencies specified in this file: `MLOpsTemplate/src/workshop/conda-local.yml`. Additionally, if you run the notebook from a Compute Instance, you can first configure your conda environment with these dependencies, and then leverage the ability to add new kernels referenced [here](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-terminal#add-new-kernels) to run your notebook.
 28 |    
 29 | 2. Discuss in your team why a monolithic code structure is a challenge to a scalable and repeatable ML development process? 
 30 |     > Note: Now observe how the monolithic notebook was refactored into a feature/data engineering module, a ML training module and a model validation module so that they can be developed and run independently.
 31 | 
 32 | 3. Go to the workshop folder.
 33 |     > Action Items: Run the following code snippet.
 34 |     ```bash 
 35 |     cd src/workshop
 36 |     ```
 37 |     > Note: Review the ```workshop/data``` folder. There are data files that were created by the data generation process. The same data files were also sent to the  Azure Machine Learning Studio's default datastore under ```workspaceblobstore/mlops_workshop/data```.
 38 | 4. Create your own development branch where you can make and track changes. This branch will be your development area to create and test new code or pipelines before committing or merging the code into a common branch, such as ```integration```.
 39 | 
 40 |     - Run following command to create a new branch named "yourname-dev"
 41 |         ```bash
 42 |         git checkout -b yourname-dev
 43 |         ```
 44 |     - This will set the working branch to ```yourname-dev```. To check, run the following command:
 45 |         ```bash
 46 |         git branch
 47 |         ```
 48 | 5. Review the refactored engineering logic from the notebook at ```feature_engineering.py``` module under the ```data_engineering``` folder.
 49 |     - The module performs the following:
 50 |         - Accepts the following parameters:
 51 |             - ```input_folder```: path to a folder for input data. The value for local test run is ```data```
 52 |             - ```prep_data```: path to a folder for output data. The value for local test run is ```data```
 53 |             - ```public_holiday_file_name```: name of the public holiday file. The value for local test run is ```holidays.parquet``` 
 54 |             - ```weather_file_name```: name of the weather raw file.It's ```weather.parquet``` 
 55 |             - ```nyc_file_name```: name of the newyork taxi raw file. It's ```green_taxi.parquet``` 
 56 |         - Performs data transformation, data merging and feature engineering logics 
 57 |         - Splits the data into train and test sets where test_size is 20%
 58 |         - Writes the output data files to output folder
 59 |         > Action Item: Run the following code snippet.
 60 |          ```bash 
 61 |           python core/data_engineering/feature_engineering.py \
 62 | 	  --input_folder data \
 63 | 	  --prep_data data \
 64 | 	  --public_holiday_file_name holidays.parquet \
 65 | 	  --weather_file_name weather.parquet \
 66 | 	  --nyc_file_name green_taxi.parquet
 67 | 5. Review the refactored ML training logic at ```ml_training.py``` module under training folder. 
 68 |     - The module performs the following:
 69 |         - Accepts the following parameters:
 70 |             - ```prep_data```: path to a folder for input data. The value for local test run is ```data```
 71 |             - ```input_file_name```: name of the input train data file. The value for local test run is ```final_df.parquet```
 72 |             - ```model_folder```: path to a output folder to save trained model.The value for local test run is ```data```
 73 |         - Splits input train data into train and validation dataset, perform training  
 74 |         - Prints out MAPE, R2 and RMSE metrics
 75 |         - Writes the train model file to output folder
 76 |         > Action Item: Run the following code snippet.
 77 |          ```bash 
 78 |           python core/training/ml_training.py \
 79 | 	  --prep_data data \
 80 | 	  --input_file_name final_df.parquet \
 81 | 	  --model_folder data
 82 | 6. Review the refactored ML training logic at ```ml_evaluating.py``` module under evaluating folder. 
 83 |     - The module performs the following:
 84 |         - Accepts the following parameters:
 85 |             - ```prep_data```: path to a folder for test input data.The value for local test run is ```data```.
 86 |             - ```input_file_name```: name of the input test data file. The value for local test run is  ```test_df.parquet```.
 87 |             - ```model_folder```: path to a model folder.The value for local test run is ```data```
 88 |         - Loads the model 
 89 |         - Scores the model on input test data, print out MAPE, R2 and RMSE metrics
 90 |         > Action Item: Run the following code snippet.
 91 |          ```bash 
 92 |             python core/evaluating/ml_evaluating.py \
 93 | 	       --prep_data data \
 94 | 	       --input_file_name test_df.parquet
 95 | 
 96 | ## Success criteria
 97 | - Feature engineering module: 
 98 |     - Data is processed correctly and output to a folder as final_df.parquet and test_df.parquet files and ready to be ML trained
 99 | - ML training module
100 |     - Perform ML training and print out MAPE, R2 and RMSE metrics from input datasets
101 |     - Produce the model at the output location
102 | - ML evaluating module
103 |     -  Perform ML training and print out MAPE, R2 and RMSE metrics from an input dataset and output a model file
104 | 
105 | ## [Go to Part 2](part_2.md)
106 | 


--------------------------------------------------------------------------------
/src/workshop/documents/part_2.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Part 2: Use cloud scale compute to run, deploy and manage ML experiment with Azure ML
  3 | 
  4 | ## Pre-requisites
  5 | - Complete [Part 0](part_0.md), [Part 1](part_1.md)
  6 | - Run each module feature_engineering, ml_training and evaluating successfully in local mode
  7 | - Have Azure ML workspace setup with a Compute Cluster named ```cpu-cluster```
  8 | 
  9 | ## Summary 
 10 | After successfully restructuring the jupyter notebook and run modules locally, your team wants to leverage Azure cloud to run the experiment at scale.
 11 | They also want to take advantage of experiment tracking and model management capabilities in Azure ML to keep track of experiment. 
 12 | Finally, the team wants to deploy the model as a rest endpoint for real time inferencing and experience the option of deploying it as batch inferencing.
 13 | To accomplish these goals, you will perform the following:
 14 | - Run feature_engineering module as a job in Azure AML 
 15 | - Run ml_training module as a job in Azure ML and observe the experiment metrics 
 16 | - Run evaluating module as a job in Azure ML and observe how the model can be registered to Azure ML model's repo
 17 | - Run the three modules together as a pipeline
 18 | - Deploy and test the produced ML model as an API using Azure Managed Online Endpoint
 19 | 
 20 | 
 21 | ## Steps
 22 | 1. Go to the workshop folder.
 23 |    > Action Item: Run the following code snippet.
 24 |     ```bash 
 25 |     cd src/workshop
 26 |     ```
 27 | 2. Set defaults values to configure your resource group and workspace.
 28 |    > Action Item: Run the following code snippet.
 29 |     ```bash 
 30 |     az configure --defaults group=YOUR_RESOURCE_GROUP workspace=YOUR_WORKSPACE
 31 |     ```
 32 | 
 33 | 3. Run the ```feature_engineering.py``` module under the ```data_engineering``` folder by following the steps below:
 34 |    > Action Items:
 35 |    > - Run the following code snippet:
 36 |       ```bash 
 37 |         az ml job create -f core/data_engineering/feature_engineering.yml 
 38 |       ```
 39 |    > - Go to Azure ML Studio and locate the run detail for this experiment.
 40 | 
 41 | 4. Run the ```ml_training.py``` module under the ```training``` folder by following the steps below:
 42 |    > Action Items:
 43 |    > - Run the following code snippet:
 44 |       ```bash 
 45 |         az ml job create -f core/training/ml_training.yml 
 46 |       ```
 47 |    > - Go to Azure ML Studio and locate the run detail for this experiment.
 48 | 
 49 | 5. Run the ```ml_evaluating.py``` module under the ```evaluating``` folder by following the steps below:
 50 |    > Action Items: 
 51 |    > - Run the following code snippet:
 52 | 
 53 |       ```bash 
 54 |         az ml job create -f core/evaluating/ml_evaluating.yml 
 55 |       ```
 56 |    > - Go to Azure ML Studio and locate the run detail for this experiment. Observe the ML metrics and how the model was logged to Azure ML's model registry.
 57 | 
 58 | 6. Create a pipeline that runs the feature_engineering, training and evaluation in one workflow.
 59 |    > Action Items: Run the pipeline, by running the following code snippet.
 60 |    
 61 |       ```bash 
 62 |         az ml job create -f core/pipelines/training_pipeline.yml 
 63 |       ```
 64 |    > - Go to the run detail at Azure ML studio and observe the relationship graph among the modules. (See chart below as well.)
 65 | 
 66 | 7. Discuss this question: Why should we run the modules both individually and together in a pipeline? 
 67 | 
 68 | 8. Deploy to Azure ML Managed Online Endpoint by following the steps below:
 69 |    > Action Items:
 70 |    > - Update the ```endpoint.yml``` file and ```deployment.yml``` by updating the name of the endpoint (should be a unique name)
 71 |    > - Create your endpoint
 72 |       ```bash 
 73 |         az ml online-endpoint create --file core/scoring/endpoint.yml 
 74 |       ```
 75 |    > - Create a green deployment 
 76 |       ```bash 
 77 |         az ml online-deployment create --file core/scoring/deployment.yml 
 78 |       ```
 79 |    > - Test the deployed service with mock-up data from scoring_test_request.json
 80 |       ```bash 
 81 |         az ml online-endpoint invoke -n YOUR_ENDPOINT_NAME --deployment green --request-file core/scoring/scoring_test_request.json 
 82 |       ``` 
 83 |    > - Observe the returned scores from the endpoint evaluation.
 84 | 
 85 | ### The entire training pipeline is illustrated with this diagram
 86 | ![training_pipeline](images/training_pipeline.png)
 87 | 
 88 | ## Success criteria
 89 | - Run the modules individually in Azure 
 90 | - Capture metrics and models in ml_training and ml_evaluating modules
 91 | - Run three modules together in a pipeline
 92 | - Model is deployed successfully to managed endpoint. 
 93 | - Testing is successful
 94 | 
 95 | ## Reference materials
 96 | - [Azure ML CLI v2 tutorial](https://docs.microsoft.com/en-us/learn/paths/train-models-azure-machine-learning-cli-v2/)
 97 | - [Azure ML CLI single job examples](https://github.com/Azure/azureml-examples/tree/main/cli/jobs/single-step)
 98 | - [Azure ML CLI pipeline examples](https://github.com/Azure/azureml-examples/tree/main/cli/jobs/pipelines)
 99 | - [Deploy to managed online endpoint](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-managed-online-endpoints)
100 | - [Deploy to batch endpoint](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-use-batch-endpoint)
101 | 
102 | ## [Go to Part 3](part_3.md)
103 | 


--------------------------------------------------------------------------------
/src/workshop/documents/part_3.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Part 3: Use GitHub for Version Control and Automation
 3 | 
 4 | ## Pre-requisites
 5 | - Complete [Part 0](part_0.md), [Part 1](part_1.md), [Part 2](part_2.md)
 6 | 
 7 | ## Summary
 8 | Your team wants to learn how to automate and orchestrate common tasks such as environment setup, training, testing using GitHub Actions. To accomplish this, the following steps will be performed:
 9 | - Setup a centralized version control to keep track of project code and manage different feature development tracks and releases
10 | - Learn how to automate and orchestrate common tasks such as environment setup, training, testing by setting up a unit test workflow to run when code is updated in your branch
11 | 
12 | ## Steps
13 | 1. Move to your dev branch you created in step 1 if you are not already there.
14 | 
15 |     > Action Items: 
16 |     > - Navigate to the repo if not already there by running ```cd PATH_TO_REPO``` with the proper path to the cloned location.
17 |     > - Run following command to check out your "yourname-dev"
18 |         
19 |     ```bash
20 |     git checkout yourname-dev
21 |     ```
22 | 
23 | 2. Create an automated unit test task that will be triggered by pushing the code to your development/feature branch. Let's use the ```Feature_Engineering``` module as the automated unit test to run to make sure the module performs correctly.
24 | 
25 |     > Action Items: Update the `workshop_unit_test.yml` file with your secret credentials. Replace the resource group, workspace and location with your specific details.
26 |     > - Locate the file named `workshop_unit_test.yml` in the `.github/workflows` folder
27 |     > - Make the following updates to the file: 
28 |     >     - Update the secret name by replacing the ```AZURE_SERVICE_PRINCIPAL``` to match the GitHub secret name for your Service Principal that was created in Part 0. (If you followed the naming convention in part 0, there is no need to update this as your secret name should be ```AZURE_SERVICE_PRINCIPAL```.)
29 |     >     - Update `GROUP`, `NAME`, and `LOCATION` with the specific names of your resource group, workspace, and location created in Part 0.
30 | 
31 | 3. Next, review the contents in the ```workshop_unit_test.yml``` file to understand the steps and how it is being triggered.
32 | 
33 |     - Review the trigger defined in the `on:` section to see how this workflow is being run automatically
34 |         - The `workflow_dispatch` allows the workflow to be run manually which can be useful when testing.
35 |         - The remaining lines highlight what is going to automatically trigger the workflow. It is being triggered on a push to any branch that is not `main` or `integration`. The changes in the push are also filtered to only include changes made to the `feature_engineering` module. 
36 |     -  Review the job starting at the `jobs:` section that has been created already and does the following steps:
37 |         - Checks out the repo
38 |         - Logs into Azure
39 |         - Creates an AML job to run feature engineering module using the [custom action](../../../.github/actions/aml-job-create/action.yaml) and the existing [feature engineering job file](../core/data_engineering/feature_engineering.yml)
40 | 
41 | 4. Now that the necessary changes have been made, the changes can be pushed to your feature branch which will trigger the feature_engineering_unit_test workflow.
42 | 
43 |     > Action Items:
44 |     > - Run the following commands in sequence to stage changes, commit them, and then push them to your repo:
45 |     1. ```bash 
46 |         git status
47 |         ```
48 |     2. ```bash 
49 |         git add .
50 |         ```
51 |     3. ```bash
52 |         git commit -am "configurations update"
53 |         ```
54 |     4. ```bash
55 |         git push origin yourname-dev
56 |         ```
57 |         > Note: `git status` shows the files that have been modified. It is useful for seeing the latest status of the files, but isn't necessary to commit changes.
58 | 
59 |     > - Check to see if the workflow was properly triggered by going to your github repo and selecting the Actions tab.
60 | 
61 | ## The CI CD Workflow is shown below:
62 | ![pipeline](images/part3cicd.png)
63 | 
64 | ## Success criteria
65 | - A feature or development branch was created to track your changes
66 | - Trigger was created on the workflow file ```workshop_unit_test.yml``` to run on a push to your feature branch
67 | - Understand the additional updates that were made to ```feature_engineering.yml``` file for it to use your secrets and AML resources
68 | - Workflow was successfully triggered by pushing changes to your feature branch
69 | 
70 | ## Reference materials
71 | - [GitHub Actions](https://github.com/features/actions)
72 | - [GitHub Actions Workflow Triggers](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows)
73 | 
74 | 
75 | ## [Go to Part 4](part_4.md)
76 | 


--------------------------------------------------------------------------------
/src/workshop/documents/part_tips.md:
--------------------------------------------------------------------------------
 1 | # Pre-Workshop Checklist
 2 | > Note: Review the following criteria to ensure you can complete the workshop. These are critical pieces of access to get right for a successful workshop experience.
 3 | 
 4 | ## Azure 
 5 | 1. Do you have an Azure account?
 6 | 
 7 | 2. Do you have a `Contributor` role for your Azure Subscription?
 8 |     - If you don't, do you have a `Contributor` role for the Azure Resource Group?
 9 |          > Note: If you don't, you can't run the workshop.
10 | 
11 | 3. Do you have a Service Principal?
12 |     - If you don't, do you know the Service Principal and it's information (client id, secret)?
13 |     - If you don't, can you ask your Cloud team to create the Service Principal for limited scope of a resource group?
14 |          > Note: If you don't, you can't run the workshop.
15 | 
16 | 4. Do you know who can help you to handle issues?
17 | 
18 | 5. Do you know a person from your Cloud infra/security team who can help you:
19 |     - Create Azure resources
20 |     - Grant permission
21 | 
22 | 6. Did you register 'Microsoft.MachineLearningServices' for your Azure subscription?
23 | > Note: If you're not sure, go to the Azure Portal > Subscriptions > 'YourSubscription' > Resource providers' > Search 'Microsoft.MachineLearningServices'
24 | 
25 | ![ml_services](./images/arm100.png)
26 | 
27 | ## Github
28 | 1. Do you have a Github account?
29 | > Note: If not, create a new account and follow the instructions in Part 0 of the workshop.
30 | 
31 | # [Go to Part 0](./part_0.md)
32 | 


--------------------------------------------------------------------------------
/src/workshop/infra/conda.yml:
--------------------------------------------------------------------------------
 1 | name: workshop-online-scoring
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8.12
 6 |   - pip=21.3.1
 7 |   - pip:
 8 |     - azureml-mlflow==1.38.0
 9 |     - azureml-defaults==1.38.0
10 |     - pandas
11 |     - scikit-learn==1.0.2


--------------------------------------------------------------------------------
/src/workshop/requirements-local.txt:
--------------------------------------------------------------------------------
1 | azureml-sdk==1.38.0
2 | azureml-mlflow==1.38.0
3 | azureml-opendatasets==1.38.0
4 | pandas==1.3.5
5 | scikit-learn==1.0.2
6 | importlib-metadata<3,>=0.12
7 | msrest==0.6.21


--------------------------------------------------------------------------------