├── images
    ├── proxy.png
    └── custom_model.png
├── conda.yaml
├── conda_custom.yaml
├── proxy_server.py
├── README.md
└── custom_model_train.py


/images/proxy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amesar/mlflow-model-monitoring/HEAD/images/proxy.png


--------------------------------------------------------------------------------
/images/custom_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amesar/mlflow-model-monitoring/HEAD/images/custom_model.png


--------------------------------------------------------------------------------
/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: mlflow-model-monitoring
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.7.6
 6 |   - pip:
 7 |     - flask
 8 |     - requests
 9 |     - mlflow>=1.18.0
10 |     - scikit-learn>=0.24.2
11 | 


--------------------------------------------------------------------------------
/conda_custom.yaml:
--------------------------------------------------------------------------------
 1 | name: mlflow-model-monitoring-custom
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.7.6
 6 |   - pip:
 7 |     - mlflow
 8 |     - scikit-learn>=0.24.2
 9 |     - cloudpickle==1.6.0
10 | 


--------------------------------------------------------------------------------
/proxy_server.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import uuid
 4 | import requests
 5 | import logging
 6 | logging.basicConfig(level=logging.DEBUG)
 7 | import json
 8 | import click
 9 | 
10 | from flask import Flask, request
11 | app = Flask(__name__)
12 | data_type = "application/json"
13 | 
14 | _log_dir = None
15 | _mlflow_model_server_uri = None
16 | 
17 | def write_records(inp, out, log_dir):
18 |     os.makedirs(log_dir, exist_ok=True)
19 |     columns = [ c for c in inp["columns"]]
20 |     columns.insert(0, "prediction")
21 |     opath = os.path.join(log_dir,str(uuid.uuid4())+".csv")
22 |     with open(opath, "w") as f:
23 |         f.write( ",".join(columns)+"\n")
24 |         for pred, row in zip(out, inp["data"]):
25 |             row.insert(0, pred)
26 |             row = [ str(x) for x in row ]
27 |             f.write( ",".join(row)+"\n")
28 | 
29 | def call_mlflow_model_server(data):
30 |     headers = { "accept": data_type, "Content-Type": data_type }
31 |     rsp = requests.post(url=_mlflow_model_server_uri, data=json.dumps(data), allow_redirects=True, headers=headers)
32 |     return json.loads(rsp.text)
33 | 
34 | @app.route("/invocations", methods = [ "POST" ])
35 | def process():
36 |     inp = request.json
37 |     out = call_mlflow_model_server(inp)
38 |     write_records(inp, out, _log_dir)
39 |     return json.dumps(out)
40 | 
41 | 
42 | @click.command()
43 | @click.option("--port", help="Port", type=int, required=True)
44 | @click.option("--mlflow-model-server-uri", help="MLflow model server URI", type=str, required=True)
45 | @click.option("--log-dir", help="Log directory", default="tmp", type=str)
46 | 
47 | def main(port, mlflow_model_server_uri, log_dir):
48 |     print("Options:")
49 |     for k,v in locals().items():
50 |         print(f"  {k}: {v}")
51 |     global _log_dir, _mlflow_model_server_uri
52 |     _mlflow_model_server_uri = mlflow_model_server_uri
53 |     _log_dir = log_dir
54 |     app.run(debug=True, port=port)
55 | 
56 | if __name__ == '__main__':
57 |     main()
58 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MLflow model monitoring example
 2 | 
 3 | A simple example demonstrating how to log request and response (prediction) data for an MLflow model server.
 4 | Once the data is logged, a separate process can monitor the logging location and do analytics to determine data drift 
 5 | and then launch model retraining and redeployment.
 6 | 
 7 | Two ways to log:
 8 | * With proxy server that logs the data. Only `split-orient` JSON input is currently supported.
 9 | * With custom [PythonModel](https://www.mlflow.org/docs/latest/models.html#custom-python-models) that logs the data.
10 | 
11 | The data is currently logged to local disk. Next TODO is to log it to cloud storage (S3).
12 | 
13 | ## Setup
14 | 
15 | ```
16 | conda env create --file conda.yaml
17 | conda activate mlflow-model-monitoring
18 | ```
19 | ## Train and register two models
20 | 
21 | ```
22 | python custom_model_train.py
23 | ```
24 | 
25 | This will create two registered models: `sklearn-monitor` and `sklearn-monitor-custom`.
26 | 
27 | ## Proxy Server
28 | 
29 | The proxy server forwards the request to the actual model server, and then logs the input and output data as a CSV file.
30 | 
31 | <img src="images/proxy.png" height="220" >
32 | 
33 | Source code: [proxy_server.py](proxy_server.py).
34 | 
35 | Start model server.
36 | ```
37 | mlflow models serve --port 5002 --model-uri models:/sklearn-monitor/production
38 | ```
39 | 
40 | Start proxy server.
41 | ```
42 | python proxy_server.py --port 5001 --mlflow-model-server-uri http://localhost:5002/invocations --log_dir out
43 | ```
44 | 
45 | ## Custom PythonModel 
46 | 
47 | <img src="images/custom_model.png" height="220" >
48 | 
49 | Source code: [custom_model_train.py](custom_model_train.py).
50 | 
51 | Start model server.
52 | ```
53 | export MLFLOW_MONITORING_DIR=tmp
54 | mlflow models serve --port 5001 --model-uri models:/sklearn-monitor-custom/production
55 | ```
56 | 
57 | ## Predictions
58 | 
59 | ```
60 | curl -X POST \
61 |   -H "accept: application/json" \
62 |   -H "Content-Type:application/json" \
63 |   -d '{ "columns": [ "alcohol", "chlorides", "citric acid", "density", "fixed acidity",
64 |                      "free sulfur dioxide", "pH", "residual sugar", "sulphates",
65 |                      "total sulfur dioxide", "volatile acidity" ],
66 |         "data": [
67 |           [ 7,   0.27, 0.36, 20.7, 0.045, 45, 170, 1.001,  3,    0.45,  8.8 ],
68 |           [ 6.3, 0.3,  0.34,  1.6, 0.049, 14, 132, 0.994,  3.3,  0.49,  9.5 ] ] }' \
69 |   http://localhost:5001/invocations
70 | ```
71 | 
72 | ## Logging directory
73 | 
74 | Each request for scoring will generate a CSV file containing the input data and the prediction. 
75 | The following example shows data for three requests.
76 | The CSV file is writen to the local directory specified in MLFLOW_MONITORING_DIR.
77 | 
78 | Note that writing to anything except the local filesystem (e.g. S3) is not viable.
79 | Specifying configuration and credentials is not practical with the custom model.
80 | Also, you cannot write to Spark or Delta tables since the Spark context is not available inside the custom model.
81 | 
82 | ```
83 | ls -l out
84 | 
85 | 74b9c823-fd32-493e-8cdd-45834369c506.csv
86 | 8d55b5e6-7693-49c0-8f69-2cc2ae0b2f6b.csv
87 | e0d90bb3-b528-43ef-82d9-2ca14fcac266.csv
88 | ```
89 | 
90 | ```
91 | cat 74b9c823-fd32-493e-8cdd-45834369c506.csv
92 | 
93 | prediction,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
94 | 6.0,7,0.27,0.36,20.7,0.045,45,170,1.001,3,0.45,8.8
95 | 6.0,6.3,0.3,0.34,1.6,0.049,14,132,0.994,3.3,0.49,9.5
96 | 5.0,8.1,0.28,0.4,6.9,0.05,30,97,0.9951,3.26,0.44,10.1
97 | ```
98 | 
99 | 


--------------------------------------------------------------------------------
/custom_model_train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import uuid
  3 | import pandas as pd
  4 | import numpy as np
  5 | import yaml
  6 | import click
  7 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
  8 | from sklearn.model_selection import train_test_split
  9 | from sklearn.tree import DecisionTreeRegressor
 10 | import mlflow
 11 | import mlflow.sklearn
 12 | 
 13 | print("MLflow Version:", mlflow.__version__)
 14 | print("MLflow Tracking URI:", mlflow.get_tracking_uri())
 15 | client = mlflow.tracking.MlflowClient()
 16 | 
 17 | class CustomModel(mlflow.pyfunc.PythonModel):
 18 |     def __init__(self, model):
 19 |         self.model = model
 20 |     def predict(self, context, data):
 21 |         predictions = self.model.predict(data)
 22 |         data.insert(0, "prediction", predictions.tolist())
 23 |         out_dir = os.environ.get("MLFLOW_MONITORING_DIR","out")
 24 |         os.makedirs(out_dir, exist_ok=True)
 25 |         path = os.path.join(out_dir,str(uuid.uuid4())+".csv")
 26 |         with open(path, "w") as f:
 27 |             data.to_csv(f, index=False)
 28 |         return predictions 
 29 | 
 30 | def build_data(data_path):
 31 |     col_label = "quality"
 32 |     data = pd.read_csv(data_path)
 33 |     train, test = train_test_split(data, test_size=0.30, random_state=2019)
 34 |     X_train = train.drop([col_label], axis=1)
 35 |     X_test = test.drop([col_label], axis=1)
 36 |     y_train = train[[col_label]]
 37 |     y_test = test[[col_label]]
 38 |     return X_train, X_test, y_train, y_test 
 39 | 
 40 | def train(data_path, max_depth, max_leaf_nodes):
 41 |     X_train, X_test, y_train, y_test = build_data(data_path)
 42 |     with mlflow.start_run() as run:
 43 |         run_id = run.info.run_uuid
 44 |         experiment_id = run.info.experiment_id
 45 |         print("MLflow:")
 46 |         print("  run_id:", run_id)
 47 |         print("  experiment_id:", experiment_id)
 48 |         print("  experiment_name:", client.get_experiment(experiment_id).name)
 49 | 
 50 |         # Create model
 51 |         dt = DecisionTreeRegressor(max_depth=max_depth, max_leaf_nodes=max_leaf_nodes)
 52 |         print("Model:\n ", dt)
 53 | 
 54 |         # Fit and predict
 55 |         dt.fit(X_train, y_train)
 56 |         predictions = dt.predict(X_test)
 57 | 
 58 |         # MLflow params
 59 |         mlflow.log_param("max_depth", max_depth)
 60 |         mlflow.log_param("max_leaf_nodes", max_leaf_nodes)
 61 | 
 62 |         # MLflow metrics
 63 |         mlflow.log_metric("rmse", np.sqrt(mean_squared_error(y_test, predictions)))
 64 |         mlflow.log_metric("r2", r2_score(y_test, predictions))
 65 |         mlflow.log_metric("mae",  mean_absolute_error(y_test, predictions))
 66 |         
 67 |         # MLflow tags
 68 |         mlflow.set_tag("data_path", data_path)
 69 |         mlflow.set_tag("mlflow_version", mlflow.__version__)
 70 | 
 71 |         # Pipeline
 72 |         from sklearn.pipeline import Pipeline
 73 |         pipeline = Pipeline([('step', dt)])
 74 | 
 75 |         # Log model
 76 |         mlflow.sklearn.log_model(pipeline, "sklearn-model")
 77 |         register_model(run_id, "sklearn-model", "sklearn-monitor")
 78 | 
 79 |         # Log custom model
 80 |         path = "conda_custom.yaml"
 81 |         with open(path, "r") as f:
 82 |             dct = yaml.safe_load(f)
 83 |         mlflow.pyfunc.log_model("sklearn-model-custom", python_model=CustomModel(pipeline), conda_env=dct)
 84 |         register_model(run_id, "sklearn-model-custom", "sklearn-monitor-custom")
 85 | 
 86 |     return (experiment_id,run_id)
 87 | 
 88 | def register_model(run_id, artifact_model_name, registered_model_name):
 89 |     version = mlflow.register_model(model_uri=f"runs:/{run_id}/{artifact_model_name}", name=registered_model_name)
 90 |     client.transition_model_version_stage(
 91 |         name=version.name,
 92 |         version=version.version,
 93 |         stage="Production",
 94 |         archive_existing_versions=True)
 95 | 
 96 | @click.command()
 97 | @click.option("--experiment_name", help="Experiment name", type=str, default="sklearn_monitor")
 98 | @click.option("--data-path", help="Data path", type=str, default="data/wine-quality-white.csv")
 99 | @click.option("--max-depth", help="Max depth", type=int, default=None)
100 | @click.option("--max-leaf-nodes", help="Max leaf nodes", type=int, default=None)
101 | def main(experiment_name, data_path, max_depth, max_leaf_nodes):
102 |     print("Options:")
103 |     for k,v in locals().items():
104 |         print(f"  {k}: {v}")
105 |     mlflow.set_experiment(experiment_name)
106 |     _,run_id =  train(data_path, max_depth, max_leaf_nodes)
107 | 
108 | if __name__ == "__main__":
109 |     main()
110 | 


--------------------------------------------------------------------------------