├── .funcignore ├── proxies.json ├── requirements.txt ├── host.json ├── MLflowWebHookTransition ├── function.json └── __init__.py ├── az-devops-setup ├── azure-pipelines.yaml └── ExecuteDevOpsTrigger.py ├── .gitignore └── README.md /.funcignore: -------------------------------------------------------------------------------- 1 | .git* 2 | .vscode 3 | local.settings.json 4 | test 5 | .venv -------------------------------------------------------------------------------- /proxies.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json.schemastore.org/proxies", 3 | "proxies": {} 4 | } 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # DO NOT include azure-functions-worker in this file 2 | # The Python Worker is managed by Azure Functions platform 3 | # Manually managing azure-functions-worker may cause unexpected issues 4 | 5 | azure-functions 6 | azure-devops 7 | -------------------------------------------------------------------------------- /host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | } 9 | } 10 | }, 11 | "extensionBundle": { 12 | "id": "Microsoft.Azure.Functions.ExtensionBundle", 13 | "version": "[1.*, 2.0.0)" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /MLflowWebHookTransition/function.json: -------------------------------------------------------------------------------- 1 | { 2 | "scriptFile": "__init__.py", 3 | "bindings": [ 4 | { 5 | "authLevel": "anonymous", 6 | "type": "httpTrigger", 7 | "direction": "in", 8 | "name": "req", 9 | "methods": [ 10 | "post" 11 | ] 12 | }, 13 | { 14 | "type": "http", 15 | "direction": "out", 16 | "name": "$return" 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /az-devops-setup/azure-pipelines.yaml: -------------------------------------------------------------------------------- 1 | # Starter pipeline 2 | # Start with a minimal pipeline that you can customize to build and deploy your code. 3 | # Add steps that build, run tests, deploy, and more: 4 | # https://aka.ms/yaml 5 | 6 | # this job requires 3 variables: 7 | # databricks_host - DB host 8 | # databricks_token - DB token 9 | # db_job_id - job ID 10 | 11 | trigger: 12 | - master 13 | 14 | pool: 15 | vmImage: 'ubuntu-20.04' 16 | 17 | steps: 18 | - script: | 19 | pip install databricks-cli 20 | ~/.local/bin/databricks -h 21 | displayName: 'Install Databricks CLI' 22 | 23 | - script: | 24 | ~/.local/bin/databricks jobs run-now --job-id $DB_JOB_ID --notebook-params "{\"model_name\": \"${MODEL_NAME}\", \"version\": \"${VERSION}\", \"stage\": \"${STAGE}\", \"timestamp\": \"${TIMESTAMP}\", \"text\": \"${TEXT}\", \"webhook_id\": \"${WEBHOOK_ID}\"}" 25 | displayName: 'Trigger JOB' 26 | -------------------------------------------------------------------------------- /az-devops-setup/ExecuteDevOpsTrigger.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | dbutils.widgets.text(name = "model_name", defaultValue = "unknown model", label = "Model Name") 3 | dbutils.widgets.text(name = "version", defaultValue="-1",label = "Version") 4 | dbutils.widgets.text(name = "stage", defaultValue="Unknown",label = "To Stage") 5 | dbutils.widgets.text(name = "timestamp", defaultValue="0",label = "Version") 6 | dbutils.widgets.text(name = "text", defaultValue="",label = "Version") 7 | dbutils.widgets.text(name = "webhook_id", defaultValue="",label = "Version") 8 | 9 | # COMMAND ---------- 10 | 11 | dict = { 12 | 'model_name': dbutils.widgets.get("model_name"), 13 | 'version': dbutils.widgets.get("version"), 14 | 'stage': dbutils.widgets.get("stage"), 15 | 'timestamp': dbutils.widgets.get("timestamp"), 16 | 'text': dbutils.widgets.get("text"), 17 | 'webhook_id': dbutils.widgets.get("webhook_id") 18 | } 19 | 20 | # COMMAND ---------- 21 | 22 | from pyspark.sql import Row 23 | import pyspark.sql.functions as F 24 | df = spark.createDataFrame(Row(dict)).withColumn("run_ts", F.current_timestamp()) 25 | 26 | # COMMAND ---------- 27 | 28 | df.write.format("delta").mode("append").option("mergeSchema", "true").save("/tmp/alexey.ott/mlflow-runs/") 29 | 30 | # COMMAND ---------- 31 | 32 | #display(spark.read.format("delta").load("/tmp/alexey.ott/mlflow-runs/")) 33 | 34 | # COMMAND ---------- 35 | 36 | 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # pipenv 86 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 87 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 88 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not 89 | # install all needed dependencies. 90 | #Pipfile.lock 91 | 92 | # celery beat schedule file 93 | celerybeat-schedule 94 | 95 | # SageMath parsed files 96 | *.sage.py 97 | 98 | # Environments 99 | .env 100 | .venv 101 | env/ 102 | venv/ 103 | ENV/ 104 | env.bak/ 105 | venv.bak/ 106 | 107 | # Spyder project settings 108 | .spyderproject 109 | .spyproject 110 | 111 | # Rope project settings 112 | .ropeproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | .dmypy.json 120 | dmypy.json 121 | 122 | # Pyre type checker 123 | .pyre/ 124 | 125 | # Azure Functions artifacts 126 | bin 127 | obj 128 | appsettings.json 129 | local.settings.json 130 | .python_packages 131 | /.vscode/ 132 | *~ 133 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | This repository contains an example of the integration of MLflow with Azure DevOps via [MLflow Registry webhooks](https://docs.microsoft.com/en-us/azure/databricks/applications/mlflow/model-registry-webhooks?cid=kerryherger), using the Azure Function as intermediate layer to work around the lack of support for authentication headers, custom payload, etc. 3 | 4 | ## Create Databricks job 5 | 6 | This is just for example, as it's easier to trigger Databricks job directly via webhook. 7 | 8 | Create a job that will be triggered from CI/CD pipeline. This job will accept a number of parameters, such as model name, version, etc. 9 | You can use [az-devops-setup/ExecuteDevOpsTrigger.py](az-devops-setup/ExecuteDevOpsTrigger.py) as a base for it. 10 | 11 | ## Create a CI/CD pipeline 12 | 13 | Create a new build pipeline using the pipeline definition in the [az-devops-setup/azure-pipelines.yaml](az-devops-setup/azure-pipelines.yaml) file. This pipeline will trigger the job on Databricks - the Databricks host, token, and job ID are configured via pipeline variables. You need to get the ID of created pipeline & put it into the Azure Function definition. 14 | 15 | ## Create Azure Function 16 | 17 | Use current folder to create an Azure Function with name `MLflowWebHookTransition` - easiest way to do it is to use VS Code with Azure plugin. Before putting code into active state, update following variables in `MLflowWebHookTransition/__init__.py` (this is just for example, in reality you need to use Azure KeyVault (for example, as [described here](https://servian.dev/accessing-azure-key-vault-from-python-functions-44d548b49b37))): 18 | 19 | * `to_staging_pipeline_id` - ID of Azure DevOps pipeline to trigger when model is transitioned into Staging (except cases when it's moved from Production) 20 | * `to_prod_pipeline_id` - ID of Azure DevOps pipeline to trigger when model is transitioned into Production 21 | * `organization_url` - URL of DevOps space 22 | * `personal_access_token` - personal access token for Azure DevOps (it just needs to be able to read pipeline & trigger the job) 23 | 24 | ## Register the webhook 25 | 26 | Use `curl` to create actual webhook (see [documentation](https://docs.microsoft.com/en-us/azure/databricks/applications/mlflow/model-registry-webhooks?cid=kerryherger) for more information): 27 | 28 | ```sh 29 | MODEL_NAME='aott-wine-model' 30 | PAT="...." # change it to your Databricks personal access token 31 | DBHOST="https://adb-1234.10.azuredatabricks.net" 32 | AZFUNC='aott-mlflow-hook-1' 33 | AZHOOKNAME='MLflowWebHookTransition' 34 | 35 | #curl -H "Authorization: Bearer $PAT" "$DBHOST/api/2.0/mlflow/registry-webhooks/list?model_name=$MODEL_NAME" 36 | curl -H "Authorization: Bearer $PAT" "$DBHOST/api/2.0/mlflow/registry-webhooks/create" -X POST -d "{\"model_name\": \"$MODEL_NAME\", \"events\": [\"MODEL_VERSION_TRANSITIONED_STAGE\"], \"description\": \"Test for $MODEL_NAME\", \"http_url_spec\": { \"url\": \"https://${AZFUNC}.azurewebsites.net/api/${AZHOOKNAME}\"}}" 37 | ``` 38 | -------------------------------------------------------------------------------- /MLflowWebHookTransition/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import azure.functions as func 4 | 5 | from msrest.authentication import BasicAuthentication 6 | 7 | from azure.devops.connection import Connection 8 | from azure.devops.v6_0.pipelines.models import RunPipelineParameters,Variable 9 | 10 | def create_error(msg): 11 | func.HttpResponse(msg, status_code=400) 12 | 13 | personal_access_token = '' 14 | organization_url = 'https://dev.azure.com/xxx/' 15 | to_prod_pipeline_id = -1 16 | to_staging_pipeline_id = 3 17 | project = 'MLFlowHooks' 18 | 19 | def maybe_trigger_action(req_body: dict): 20 | 21 | to_stage = req_body.get("to_stage", "") 22 | from_stage = req_body.get("from_stage", "") 23 | model_name = req_body["model_name"] 24 | model_version = req_body.get("version", "0") 25 | wh_id = req_body["webhook_id"] 26 | timestamp = req_body["event_timestamp"] 27 | payload_text = req_body.get("text", "") 28 | 29 | action = "None" 30 | 31 | credentials = BasicAuthentication('', personal_access_token) 32 | connection = Connection(base_url=organization_url, creds=credentials) 33 | pipeline_client=connection.clients_v6_0.get_pipelines_client() 34 | 35 | pipeline_id = -1 36 | 37 | variables={ 38 | 'model_name': Variable(value=model_name), 39 | 'version': Variable(value=model_version), 40 | 'webhook_id': Variable(value=wh_id), 41 | 'timestamp': Variable(value=timestamp), 42 | 'text': Variable(value=payload_text), 43 | 'stage': Variable(value=to_stage) 44 | } 45 | run_parameters=RunPipelineParameters(variables=variables) 46 | logging.info("Going to trigger build with parameters: %s", run_parameters) 47 | 48 | if to_stage == "Staging" and not from_stage == "Production": 49 | logging.info("Going to trigger integration pipeline") 50 | 51 | pipeline_id = to_staging_pipeline_id 52 | 53 | action = f"Integration test is triggered." 54 | 55 | if to_stage == "Production": 56 | logging.info("Going to trigger release pipeline") 57 | # 58 | pipeline_id = to_prod_pipeline_id 59 | 60 | action = "Deployment to Azure ML triggered." 61 | 62 | if pipeline_id != -1: 63 | # Run pipeline 64 | run_pipeline = pipeline_client.run_pipeline( 65 | run_parameters=run_parameters, 66 | project=project, 67 | pipeline_id=pipeline_id) 68 | 69 | action = action + f' Status: {run_pipeline}' 70 | 71 | return action 72 | 73 | def main(req: func.HttpRequest) -> func.HttpResponse: 74 | logging.info('Python HTTP trigger function processed a request.') 75 | 76 | if (req.method != "POST"): 77 | logging.error('It should be POST request!') 78 | return create_error("It should be POST request!") 79 | 80 | try: 81 | req_body = req.get_json() 82 | logging.info("Request body: %s", req_body) 83 | except ValueError: 84 | logging.error('Can\'t parse JSON payload') 85 | return create_error("Can't parse JSON payload") 86 | 87 | try: 88 | event = req_body["event"] 89 | model_name = req_body["model_name"] 90 | model_version = req_body.get("version", "0") 91 | except Exception: 92 | logging.error("Can't extract data from payload") 93 | return create_error("Can't extract data from payload") 94 | 95 | ret_str = f"Processing event: {event} for model {model_name} with version {model_version}" 96 | 97 | if event == "MODEL_VERSION_TRANSITIONED_STAGE": 98 | ret_str = ret_str + ". Action: " + maybe_trigger_action(req_body) 99 | 100 | logging.info(ret_str) 101 | return func.HttpResponse(ret_str) 102 | --------------------------------------------------------------------------------