├── images ├── proxy.png └── custom_model.png ├── conda.yaml ├── conda_custom.yaml ├── proxy_server.py ├── README.md └── custom_model_train.py /images/proxy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amesar/mlflow-model-monitoring/HEAD/images/proxy.png -------------------------------------------------------------------------------- /images/custom_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amesar/mlflow-model-monitoring/HEAD/images/custom_model.png -------------------------------------------------------------------------------- /conda.yaml: -------------------------------------------------------------------------------- 1 | name: mlflow-model-monitoring 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.7.6 6 | - pip: 7 | - flask 8 | - requests 9 | - mlflow>=1.18.0 10 | - scikit-learn>=0.24.2 11 | -------------------------------------------------------------------------------- /conda_custom.yaml: -------------------------------------------------------------------------------- 1 | name: mlflow-model-monitoring-custom 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.7.6 6 | - pip: 7 | - mlflow 8 | - scikit-learn>=0.24.2 9 | - cloudpickle==1.6.0 10 | -------------------------------------------------------------------------------- /proxy_server.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import uuid 4 | import requests 5 | import logging 6 | logging.basicConfig(level=logging.DEBUG) 7 | import json 8 | import click 9 | 10 | from flask import Flask, request 11 | app = Flask(__name__) 12 | data_type = "application/json" 13 | 14 | _log_dir = None 15 | _mlflow_model_server_uri = None 16 | 17 | def write_records(inp, out, log_dir): 18 | os.makedirs(log_dir, exist_ok=True) 19 | columns = [ c for c in inp["columns"]] 20 | columns.insert(0, "prediction") 21 | opath = os.path.join(log_dir,str(uuid.uuid4())+".csv") 22 | with open(opath, "w") as f: 23 | f.write( ",".join(columns)+"\n") 24 | for pred, row in zip(out, inp["data"]): 25 | row.insert(0, pred) 26 | row = [ str(x) for x in row ] 27 | f.write( ",".join(row)+"\n") 28 | 29 | def call_mlflow_model_server(data): 30 | headers = { "accept": data_type, "Content-Type": data_type } 31 | rsp = requests.post(url=_mlflow_model_server_uri, data=json.dumps(data), allow_redirects=True, headers=headers) 32 | return json.loads(rsp.text) 33 | 34 | @app.route("/invocations", methods = [ "POST" ]) 35 | def process(): 36 | inp = request.json 37 | out = call_mlflow_model_server(inp) 38 | write_records(inp, out, _log_dir) 39 | return json.dumps(out) 40 | 41 | 42 | @click.command() 43 | @click.option("--port", help="Port", type=int, required=True) 44 | @click.option("--mlflow-model-server-uri", help="MLflow model server URI", type=str, required=True) 45 | @click.option("--log-dir", help="Log directory", default="tmp", type=str) 46 | 47 | def main(port, mlflow_model_server_uri, log_dir): 48 | print("Options:") 49 | for k,v in locals().items(): 50 | print(f" {k}: {v}") 51 | global _log_dir, _mlflow_model_server_uri 52 | _mlflow_model_server_uri = mlflow_model_server_uri 53 | _log_dir = log_dir 54 | app.run(debug=True, port=port) 55 | 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MLflow model monitoring example 2 | 3 | A simple example demonstrating how to log request and response (prediction) data for an MLflow model server. 4 | Once the data is logged, a separate process can monitor the logging location and do analytics to determine data drift 5 | and then launch model retraining and redeployment. 6 | 7 | Two ways to log: 8 | * With proxy server that logs the data. Only `split-orient` JSON input is currently supported. 9 | * With custom [PythonModel](https://www.mlflow.org/docs/latest/models.html#custom-python-models) that logs the data. 10 | 11 | The data is currently logged to local disk. Next TODO is to log it to cloud storage (S3). 12 | 13 | ## Setup 14 | 15 | ``` 16 | conda env create --file conda.yaml 17 | conda activate mlflow-model-monitoring 18 | ``` 19 | ## Train and register two models 20 | 21 | ``` 22 | python custom_model_train.py 23 | ``` 24 | 25 | This will create two registered models: `sklearn-monitor` and `sklearn-monitor-custom`. 26 | 27 | ## Proxy Server 28 | 29 | The proxy server forwards the request to the actual model server, and then logs the input and output data as a CSV file. 30 | 31 | 32 | 33 | Source code: [proxy_server.py](proxy_server.py). 34 | 35 | Start model server. 36 | ``` 37 | mlflow models serve --port 5002 --model-uri models:/sklearn-monitor/production 38 | ``` 39 | 40 | Start proxy server. 41 | ``` 42 | python proxy_server.py --port 5001 --mlflow-model-server-uri http://localhost:5002/invocations --log_dir out 43 | ``` 44 | 45 | ## Custom PythonModel 46 | 47 | 48 | 49 | Source code: [custom_model_train.py](custom_model_train.py). 50 | 51 | Start model server. 52 | ``` 53 | export MLFLOW_MONITORING_DIR=tmp 54 | mlflow models serve --port 5001 --model-uri models:/sklearn-monitor-custom/production 55 | ``` 56 | 57 | ## Predictions 58 | 59 | ``` 60 | curl -X POST \ 61 | -H "accept: application/json" \ 62 | -H "Content-Type:application/json" \ 63 | -d '{ "columns": [ "alcohol", "chlorides", "citric acid", "density", "fixed acidity", 64 | "free sulfur dioxide", "pH", "residual sugar", "sulphates", 65 | "total sulfur dioxide", "volatile acidity" ], 66 | "data": [ 67 | [ 7, 0.27, 0.36, 20.7, 0.045, 45, 170, 1.001, 3, 0.45, 8.8 ], 68 | [ 6.3, 0.3, 0.34, 1.6, 0.049, 14, 132, 0.994, 3.3, 0.49, 9.5 ] ] }' \ 69 | http://localhost:5001/invocations 70 | ``` 71 | 72 | ## Logging directory 73 | 74 | Each request for scoring will generate a CSV file containing the input data and the prediction. 75 | The following example shows data for three requests. 76 | The CSV file is writen to the local directory specified in MLFLOW_MONITORING_DIR. 77 | 78 | Note that writing to anything except the local filesystem (e.g. S3) is not viable. 79 | Specifying configuration and credentials is not practical with the custom model. 80 | Also, you cannot write to Spark or Delta tables since the Spark context is not available inside the custom model. 81 | 82 | ``` 83 | ls -l out 84 | 85 | 74b9c823-fd32-493e-8cdd-45834369c506.csv 86 | 8d55b5e6-7693-49c0-8f69-2cc2ae0b2f6b.csv 87 | e0d90bb3-b528-43ef-82d9-2ca14fcac266.csv 88 | ``` 89 | 90 | ``` 91 | cat 74b9c823-fd32-493e-8cdd-45834369c506.csv 92 | 93 | prediction,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol 94 | 6.0,7,0.27,0.36,20.7,0.045,45,170,1.001,3,0.45,8.8 95 | 6.0,6.3,0.3,0.34,1.6,0.049,14,132,0.994,3.3,0.49,9.5 96 | 5.0,8.1,0.28,0.4,6.9,0.05,30,97,0.9951,3.26,0.44,10.1 97 | ``` 98 | 99 | -------------------------------------------------------------------------------- /custom_model_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | import pandas as pd 4 | import numpy as np 5 | import yaml 6 | import click 7 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 8 | from sklearn.model_selection import train_test_split 9 | from sklearn.tree import DecisionTreeRegressor 10 | import mlflow 11 | import mlflow.sklearn 12 | 13 | print("MLflow Version:", mlflow.__version__) 14 | print("MLflow Tracking URI:", mlflow.get_tracking_uri()) 15 | client = mlflow.tracking.MlflowClient() 16 | 17 | class CustomModel(mlflow.pyfunc.PythonModel): 18 | def __init__(self, model): 19 | self.model = model 20 | def predict(self, context, data): 21 | predictions = self.model.predict(data) 22 | data.insert(0, "prediction", predictions.tolist()) 23 | out_dir = os.environ.get("MLFLOW_MONITORING_DIR","out") 24 | os.makedirs(out_dir, exist_ok=True) 25 | path = os.path.join(out_dir,str(uuid.uuid4())+".csv") 26 | with open(path, "w") as f: 27 | data.to_csv(f, index=False) 28 | return predictions 29 | 30 | def build_data(data_path): 31 | col_label = "quality" 32 | data = pd.read_csv(data_path) 33 | train, test = train_test_split(data, test_size=0.30, random_state=2019) 34 | X_train = train.drop([col_label], axis=1) 35 | X_test = test.drop([col_label], axis=1) 36 | y_train = train[[col_label]] 37 | y_test = test[[col_label]] 38 | return X_train, X_test, y_train, y_test 39 | 40 | def train(data_path, max_depth, max_leaf_nodes): 41 | X_train, X_test, y_train, y_test = build_data(data_path) 42 | with mlflow.start_run() as run: 43 | run_id = run.info.run_uuid 44 | experiment_id = run.info.experiment_id 45 | print("MLflow:") 46 | print(" run_id:", run_id) 47 | print(" experiment_id:", experiment_id) 48 | print(" experiment_name:", client.get_experiment(experiment_id).name) 49 | 50 | # Create model 51 | dt = DecisionTreeRegressor(max_depth=max_depth, max_leaf_nodes=max_leaf_nodes) 52 | print("Model:\n ", dt) 53 | 54 | # Fit and predict 55 | dt.fit(X_train, y_train) 56 | predictions = dt.predict(X_test) 57 | 58 | # MLflow params 59 | mlflow.log_param("max_depth", max_depth) 60 | mlflow.log_param("max_leaf_nodes", max_leaf_nodes) 61 | 62 | # MLflow metrics 63 | mlflow.log_metric("rmse", np.sqrt(mean_squared_error(y_test, predictions))) 64 | mlflow.log_metric("r2", r2_score(y_test, predictions)) 65 | mlflow.log_metric("mae", mean_absolute_error(y_test, predictions)) 66 | 67 | # MLflow tags 68 | mlflow.set_tag("data_path", data_path) 69 | mlflow.set_tag("mlflow_version", mlflow.__version__) 70 | 71 | # Pipeline 72 | from sklearn.pipeline import Pipeline 73 | pipeline = Pipeline([('step', dt)]) 74 | 75 | # Log model 76 | mlflow.sklearn.log_model(pipeline, "sklearn-model") 77 | register_model(run_id, "sklearn-model", "sklearn-monitor") 78 | 79 | # Log custom model 80 | path = "conda_custom.yaml" 81 | with open(path, "r") as f: 82 | dct = yaml.safe_load(f) 83 | mlflow.pyfunc.log_model("sklearn-model-custom", python_model=CustomModel(pipeline), conda_env=dct) 84 | register_model(run_id, "sklearn-model-custom", "sklearn-monitor-custom") 85 | 86 | return (experiment_id,run_id) 87 | 88 | def register_model(run_id, artifact_model_name, registered_model_name): 89 | version = mlflow.register_model(model_uri=f"runs:/{run_id}/{artifact_model_name}", name=registered_model_name) 90 | client.transition_model_version_stage( 91 | name=version.name, 92 | version=version.version, 93 | stage="Production", 94 | archive_existing_versions=True) 95 | 96 | @click.command() 97 | @click.option("--experiment_name", help="Experiment name", type=str, default="sklearn_monitor") 98 | @click.option("--data-path", help="Data path", type=str, default="data/wine-quality-white.csv") 99 | @click.option("--max-depth", help="Max depth", type=int, default=None) 100 | @click.option("--max-leaf-nodes", help="Max leaf nodes", type=int, default=None) 101 | def main(experiment_name, data_path, max_depth, max_leaf_nodes): 102 | print("Options:") 103 | for k,v in locals().items(): 104 | print(f" {k}: {v}") 105 | mlflow.set_experiment(experiment_name) 106 | _,run_id = train(data_path, max_depth, max_leaf_nodes) 107 | 108 | if __name__ == "__main__": 109 | main() 110 | --------------------------------------------------------------------------------