├── .funcignore
├── proxies.json
├── requirements.txt
├── host.json
├── MLflowWebHookTransition
    ├── function.json
    └── __init__.py
├── az-devops-setup
    ├── azure-pipelines.yaml
    └── ExecuteDevOpsTrigger.py
├── .gitignore
└── README.md


/.funcignore:
--------------------------------------------------------------------------------
1 | .git*
2 | .vscode
3 | local.settings.json
4 | test
5 | .venv


--------------------------------------------------------------------------------
/proxies.json:
--------------------------------------------------------------------------------
1 | {
2 |   "$schema": "http://json.schemastore.org/proxies",
3 |   "proxies": {}
4 | }
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # DO NOT include azure-functions-worker in this file
2 | # The Python Worker is managed by Azure Functions platform
3 | # Manually managing azure-functions-worker may cause unexpected issues
4 | 
5 | azure-functions
6 | azure-devops
7 | 


--------------------------------------------------------------------------------
/host.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "2.0",
 3 |   "logging": {
 4 |     "applicationInsights": {
 5 |       "samplingSettings": {
 6 |         "isEnabled": true,
 7 |         "excludedTypes": "Request"
 8 |       }
 9 |     }
10 |   },
11 |   "extensionBundle": {
12 |     "id": "Microsoft.Azure.Functions.ExtensionBundle",
13 |     "version": "[1.*, 2.0.0)"
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/MLflowWebHookTransition/function.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "scriptFile": "__init__.py",
 3 |   "bindings": [
 4 |     {
 5 |       "authLevel": "anonymous",
 6 |       "type": "httpTrigger",
 7 |       "direction": "in",
 8 |       "name": "req",
 9 |       "methods": [
10 |         "post"
11 |       ]
12 |     },
13 |     {
14 |       "type": "http",
15 |       "direction": "out",
16 |       "name": "$return"
17 |     }
18 |   ]
19 | }
20 | 


--------------------------------------------------------------------------------
/az-devops-setup/azure-pipelines.yaml:
--------------------------------------------------------------------------------
 1 | # Starter pipeline
 2 | # Start with a minimal pipeline that you can customize to build and deploy your code.
 3 | # Add steps that build, run tests, deploy, and more:
 4 | # https://aka.ms/yaml
 5 | 
 6 | # this job requires 3 variables:
 7 | # databricks_host - DB host
 8 | # databricks_token - DB token
 9 | # db_job_id - job ID
10 | 
11 | trigger:
12 | - master
13 | 
14 | pool:
15 |   vmImage: 'ubuntu-20.04'
16 | 
17 | steps:
18 | - script: |
19 |     pip install databricks-cli
20 |     ~/.local/bin/databricks -h
21 |   displayName: 'Install Databricks CLI'
22 | 
23 | - script: |
24 |     ~/.local/bin/databricks jobs run-now --job-id $DB_JOB_ID --notebook-params "{\"model_name\": \"${MODEL_NAME}\", \"version\": \"${VERSION}\", \"stage\": \"${STAGE}\", \"timestamp\": \"${TIMESTAMP}\", \"text\": \"${TEXT}\", \"webhook_id\": \"${WEBHOOK_ID}\"}"
25 |   displayName: 'Trigger JOB'
26 | 


--------------------------------------------------------------------------------
/az-devops-setup/ExecuteDevOpsTrigger.py:
--------------------------------------------------------------------------------
 1 | # Databricks notebook source
 2 | dbutils.widgets.text(name = "model_name", defaultValue = "unknown model", label = "Model Name")
 3 | dbutils.widgets.text(name = "version", defaultValue="-1",label = "Version")
 4 | dbutils.widgets.text(name = "stage", defaultValue="Unknown",label = "To Stage")
 5 | dbutils.widgets.text(name = "timestamp", defaultValue="0",label = "Version")
 6 | dbutils.widgets.text(name = "text", defaultValue="",label = "Version")
 7 | dbutils.widgets.text(name = "webhook_id", defaultValue="",label = "Version")
 8 | 
 9 | # COMMAND ----------
10 | 
11 | dict = { 
12 |   'model_name': dbutils.widgets.get("model_name"),
13 |   'version': dbutils.widgets.get("version"),
14 |   'stage': dbutils.widgets.get("stage"),
15 |   'timestamp': dbutils.widgets.get("timestamp"),
16 |   'text': dbutils.widgets.get("text"),
17 |   'webhook_id': dbutils.widgets.get("webhook_id")
18 | }
19 | 
20 | # COMMAND ----------
21 | 
22 | from pyspark.sql import Row
23 | import pyspark.sql.functions as F
24 | df = spark.createDataFrame(Row(dict)).withColumn("run_ts", F.current_timestamp())
25 | 
26 | # COMMAND ----------
27 | 
28 | df.write.format("delta").mode("append").option("mergeSchema", "true").save("/tmp/alexey.ott/mlflow-runs/")
29 | 
30 | # COMMAND ----------
31 | 
32 | #display(spark.read.format("delta").load("/tmp/alexey.ott/mlflow-runs/"))
33 | 
34 | # COMMAND ----------
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # IPython
 79 | profile_default/
 80 | ipython_config.py
 81 | 
 82 | # pyenv
 83 | .python-version
 84 | 
 85 | # pipenv
 86 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 87 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 88 | #   having no cross-platform support, pipenv may install dependencies that don’t work, or not
 89 | #   install all needed dependencies.
 90 | #Pipfile.lock
 91 | 
 92 | # celery beat schedule file
 93 | celerybeat-schedule
 94 | 
 95 | # SageMath parsed files
 96 | *.sage.py
 97 | 
 98 | # Environments
 99 | .env
100 | .venv
101 | env/
102 | venv/
103 | ENV/
104 | env.bak/
105 | venv.bak/
106 | 
107 | # Spyder project settings
108 | .spyderproject
109 | .spyproject
110 | 
111 | # Rope project settings
112 | .ropeproject
113 | 
114 | # mkdocs documentation
115 | /site
116 | 
117 | # mypy
118 | .mypy_cache/
119 | .dmypy.json
120 | dmypy.json
121 | 
122 | # Pyre type checker
123 | .pyre/
124 | 
125 | # Azure Functions artifacts
126 | bin
127 | obj
128 | appsettings.json
129 | local.settings.json
130 | .python_packages
131 | /.vscode/
132 | *~
133 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This repository contains an example of the integration of MLflow with Azure DevOps via [MLflow Registry webhooks](https://docs.microsoft.com/en-us/azure/databricks/applications/mlflow/model-registry-webhooks?cid=kerryherger), using the Azure Function as intermediate layer to work around the lack of support for authentication headers, custom payload, etc.
 3 | 
 4 | ## Create Databricks job
 5 | 
 6 | This is just for example, as it's easier to trigger Databricks job directly via webhook.
 7 | 
 8 | Create a job that will be triggered from CI/CD pipeline.  This job will accept a number of parameters, such as model name, version, etc.
 9 | You can use [az-devops-setup/ExecuteDevOpsTrigger.py](az-devops-setup/ExecuteDevOpsTrigger.py) as a base for it.
10 | 
11 | ## Create a CI/CD pipeline
12 | 
13 | Create a new build pipeline using the pipeline definition in the [az-devops-setup/azure-pipelines.yaml](az-devops-setup/azure-pipelines.yaml) file. This pipeline will trigger the job on Databricks - the Databricks host, token, and job ID are configured via pipeline variables.  You need to get the ID of created pipeline & put it into the Azure Function definition.
14 | 
15 | ## Create Azure Function
16 | 
17 | Use current folder to create an Azure Function with name `MLflowWebHookTransition` - easiest way to do it is to use VS Code with Azure plugin.  Before putting code into active state, update following variables in `MLflowWebHookTransition/__init__.py` (this is just for example, in reality you need to use Azure KeyVault (for example, as [described here](https://servian.dev/accessing-azure-key-vault-from-python-functions-44d548b49b37))):
18 | 
19 | * `to_staging_pipeline_id` - ID of Azure DevOps pipeline to trigger when model is transitioned into Staging (except cases when it's moved from Production)
20 | * `to_prod_pipeline_id` - ID of Azure DevOps pipeline to trigger when model is transitioned into Production
21 | * `organization_url` - URL of DevOps space
22 | * `personal_access_token` - personal access token for Azure DevOps (it just needs to be able to read pipeline & trigger the job)
23 | 
24 | ## Register the webhook
25 | 
26 | Use `curl` to create actual webhook (see [documentation](https://docs.microsoft.com/en-us/azure/databricks/applications/mlflow/model-registry-webhooks?cid=kerryherger) for more information):
27 | 
28 | ```sh
29 | MODEL_NAME='aott-wine-model'
30 | PAT="...." # change it to your Databricks personal access token
31 | DBHOST="https://adb-1234.10.azuredatabricks.net"
32 | AZFUNC='aott-mlflow-hook-1'
33 | AZHOOKNAME='MLflowWebHookTransition'
34 | 
35 | #curl -H "Authorization: Bearer $PAT" "$DBHOST/api/2.0/mlflow/registry-webhooks/list?model_name=$MODEL_NAME"
36 | curl -H "Authorization: Bearer $PAT" "$DBHOST/api/2.0/mlflow/registry-webhooks/create" -X POST -d "{\"model_name\": \"$MODEL_NAME\", \"events\": [\"MODEL_VERSION_TRANSITIONED_STAGE\"], \"description\": \"Test for $MODEL_NAME\", \"http_url_spec\": { \"url\": \"https://${AZFUNC}.azurewebsites.net/api/${AZHOOKNAME}\"}}"
37 | ```
38 | 


--------------------------------------------------------------------------------
/MLflowWebHookTransition/__init__.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import azure.functions as func
  4 | 
  5 | from msrest.authentication import BasicAuthentication
  6 | 
  7 | from azure.devops.connection import Connection
  8 | from azure.devops.v6_0.pipelines.models import RunPipelineParameters,Variable
  9 | 
 10 | def create_error(msg):
 11 |     func.HttpResponse(msg, status_code=400)
 12 | 
 13 | personal_access_token = ''
 14 | organization_url = 'https://dev.azure.com/xxx/'
 15 | to_prod_pipeline_id = -1
 16 | to_staging_pipeline_id = 3
 17 | project = 'MLFlowHooks'
 18 | 
 19 | def maybe_trigger_action(req_body: dict):
 20 | 
 21 |     to_stage = req_body.get("to_stage", "")
 22 |     from_stage = req_body.get("from_stage", "")
 23 |     model_name = req_body["model_name"]
 24 |     model_version = req_body.get("version", "0")
 25 |     wh_id = req_body["webhook_id"]
 26 |     timestamp = req_body["event_timestamp"]
 27 |     payload_text = req_body.get("text", "")
 28 | 
 29 |     action = "None"
 30 | 
 31 |     credentials = BasicAuthentication('', personal_access_token)
 32 |     connection = Connection(base_url=organization_url, creds=credentials)
 33 |     pipeline_client=connection.clients_v6_0.get_pipelines_client()
 34 |     
 35 |     pipeline_id = -1
 36 | 
 37 |     variables={
 38 |         'model_name': Variable(value=model_name),
 39 |         'version': Variable(value=model_version),
 40 |         'webhook_id': Variable(value=wh_id),
 41 |         'timestamp': Variable(value=timestamp),
 42 |         'text': Variable(value=payload_text),
 43 |         'stage': Variable(value=to_stage)
 44 |     }
 45 |     run_parameters=RunPipelineParameters(variables=variables)
 46 |     logging.info("Going to trigger build with parameters: %s", run_parameters)
 47 | 
 48 |     if to_stage == "Staging" and not from_stage == "Production":
 49 |         logging.info("Going to trigger integration pipeline")
 50 | 
 51 |         pipeline_id = to_staging_pipeline_id
 52 |         
 53 |         action = f"Integration test is triggered."
 54 | 
 55 |     if to_stage == "Production":
 56 |         logging.info("Going to trigger release pipeline")
 57 |         # 
 58 |         pipeline_id = to_prod_pipeline_id
 59 | 
 60 |         action = "Deployment to Azure ML triggered."
 61 | 
 62 |     if pipeline_id != -1:
 63 |         # Run pipeline
 64 |         run_pipeline = pipeline_client.run_pipeline(
 65 |             run_parameters=run_parameters,
 66 |             project=project,
 67 |             pipeline_id=pipeline_id)
 68 | 
 69 |         action = action + f' Status: {run_pipeline}'
 70 | 
 71 |     return action
 72 | 
 73 | def main(req: func.HttpRequest) -> func.HttpResponse:
 74 |     logging.info('Python HTTP trigger function processed a request.')
 75 | 
 76 |     if (req.method != "POST"):
 77 |         logging.error('It should be POST request!')
 78 |         return create_error("It should be POST request!")
 79 | 
 80 |     try:
 81 |         req_body = req.get_json()
 82 |         logging.info("Request body: %s", req_body)
 83 |     except ValueError:
 84 |         logging.error('Can\'t parse JSON payload')
 85 |         return create_error("Can't parse JSON payload")
 86 | 
 87 |     try:
 88 |         event = req_body["event"]
 89 |         model_name = req_body["model_name"]
 90 |         model_version = req_body.get("version", "0")
 91 |     except Exception:
 92 |         logging.error("Can't extract data from payload")
 93 |         return create_error("Can't extract data from payload")
 94 | 
 95 |     ret_str = f"Processing event: {event} for model {model_name} with version {model_version}"
 96 | 
 97 |     if event == "MODEL_VERSION_TRANSITIONED_STAGE":
 98 |         ret_str = ret_str + ". Action: " + maybe_trigger_action(req_body)
 99 | 
100 |     logging.info(ret_str)
101 |     return func.HttpResponse(ret_str)
102 | 


--------------------------------------------------------------------------------