├── images
├── proxy.png
└── custom_model.png
├── conda.yaml
├── conda_custom.yaml
├── proxy_server.py
├── README.md
└── custom_model_train.py
/images/proxy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amesar/mlflow-model-monitoring/HEAD/images/proxy.png
--------------------------------------------------------------------------------
/images/custom_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amesar/mlflow-model-monitoring/HEAD/images/custom_model.png
--------------------------------------------------------------------------------
/conda.yaml:
--------------------------------------------------------------------------------
1 | name: mlflow-model-monitoring
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.7.6
6 | - pip:
7 | - flask
8 | - requests
9 | - mlflow>=1.18.0
10 | - scikit-learn>=0.24.2
11 |
--------------------------------------------------------------------------------
/conda_custom.yaml:
--------------------------------------------------------------------------------
1 | name: mlflow-model-monitoring-custom
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.7.6
6 | - pip:
7 | - mlflow
8 | - scikit-learn>=0.24.2
9 | - cloudpickle==1.6.0
10 |
--------------------------------------------------------------------------------
/proxy_server.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | import uuid
4 | import requests
5 | import logging
6 | logging.basicConfig(level=logging.DEBUG)
7 | import json
8 | import click
9 |
10 | from flask import Flask, request
11 | app = Flask(__name__)
12 | data_type = "application/json"
13 |
14 | _log_dir = None
15 | _mlflow_model_server_uri = None
16 |
17 | def write_records(inp, out, log_dir):
18 | os.makedirs(log_dir, exist_ok=True)
19 | columns = [ c for c in inp["columns"]]
20 | columns.insert(0, "prediction")
21 | opath = os.path.join(log_dir,str(uuid.uuid4())+".csv")
22 | with open(opath, "w") as f:
23 | f.write( ",".join(columns)+"\n")
24 | for pred, row in zip(out, inp["data"]):
25 | row.insert(0, pred)
26 | row = [ str(x) for x in row ]
27 | f.write( ",".join(row)+"\n")
28 |
29 | def call_mlflow_model_server(data):
30 | headers = { "accept": data_type, "Content-Type": data_type }
31 | rsp = requests.post(url=_mlflow_model_server_uri, data=json.dumps(data), allow_redirects=True, headers=headers)
32 | return json.loads(rsp.text)
33 |
34 | @app.route("/invocations", methods = [ "POST" ])
35 | def process():
36 | inp = request.json
37 | out = call_mlflow_model_server(inp)
38 | write_records(inp, out, _log_dir)
39 | return json.dumps(out)
40 |
41 |
42 | @click.command()
43 | @click.option("--port", help="Port", type=int, required=True)
44 | @click.option("--mlflow-model-server-uri", help="MLflow model server URI", type=str, required=True)
45 | @click.option("--log-dir", help="Log directory", default="tmp", type=str)
46 |
47 | def main(port, mlflow_model_server_uri, log_dir):
48 | print("Options:")
49 | for k,v in locals().items():
50 | print(f" {k}: {v}")
51 | global _log_dir, _mlflow_model_server_uri
52 | _mlflow_model_server_uri = mlflow_model_server_uri
53 | _log_dir = log_dir
54 | app.run(debug=True, port=port)
55 |
56 | if __name__ == '__main__':
57 | main()
58 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MLflow model monitoring example
2 |
3 | A simple example demonstrating how to log request and response (prediction) data for an MLflow model server.
4 | Once the data is logged, a separate process can monitor the logging location and do analytics to determine data drift
5 | and then launch model retraining and redeployment.
6 |
7 | Two ways to log:
8 | * With proxy server that logs the data. Only `split-orient` JSON input is currently supported.
9 | * With custom [PythonModel](https://www.mlflow.org/docs/latest/models.html#custom-python-models) that logs the data.
10 |
11 | The data is currently logged to local disk. Next TODO is to log it to cloud storage (S3).
12 |
13 | ## Setup
14 |
15 | ```
16 | conda env create --file conda.yaml
17 | conda activate mlflow-model-monitoring
18 | ```
19 | ## Train and register two models
20 |
21 | ```
22 | python custom_model_train.py
23 | ```
24 |
25 | This will create two registered models: `sklearn-monitor` and `sklearn-monitor-custom`.
26 |
27 | ## Proxy Server
28 |
29 | The proxy server forwards the request to the actual model server, and then logs the input and output data as a CSV file.
30 |
31 |
32 |
33 | Source code: [proxy_server.py](proxy_server.py).
34 |
35 | Start model server.
36 | ```
37 | mlflow models serve --port 5002 --model-uri models:/sklearn-monitor/production
38 | ```
39 |
40 | Start proxy server.
41 | ```
42 | python proxy_server.py --port 5001 --mlflow-model-server-uri http://localhost:5002/invocations --log_dir out
43 | ```
44 |
45 | ## Custom PythonModel
46 |
47 |
48 |
49 | Source code: [custom_model_train.py](custom_model_train.py).
50 |
51 | Start model server.
52 | ```
53 | export MLFLOW_MONITORING_DIR=tmp
54 | mlflow models serve --port 5001 --model-uri models:/sklearn-monitor-custom/production
55 | ```
56 |
57 | ## Predictions
58 |
59 | ```
60 | curl -X POST \
61 | -H "accept: application/json" \
62 | -H "Content-Type:application/json" \
63 | -d '{ "columns": [ "alcohol", "chlorides", "citric acid", "density", "fixed acidity",
64 | "free sulfur dioxide", "pH", "residual sugar", "sulphates",
65 | "total sulfur dioxide", "volatile acidity" ],
66 | "data": [
67 | [ 7, 0.27, 0.36, 20.7, 0.045, 45, 170, 1.001, 3, 0.45, 8.8 ],
68 | [ 6.3, 0.3, 0.34, 1.6, 0.049, 14, 132, 0.994, 3.3, 0.49, 9.5 ] ] }' \
69 | http://localhost:5001/invocations
70 | ```
71 |
72 | ## Logging directory
73 |
74 | Each request for scoring will generate a CSV file containing the input data and the prediction.
75 | The following example shows data for three requests.
76 | The CSV file is writen to the local directory specified in MLFLOW_MONITORING_DIR.
77 |
78 | Note that writing to anything except the local filesystem (e.g. S3) is not viable.
79 | Specifying configuration and credentials is not practical with the custom model.
80 | Also, you cannot write to Spark or Delta tables since the Spark context is not available inside the custom model.
81 |
82 | ```
83 | ls -l out
84 |
85 | 74b9c823-fd32-493e-8cdd-45834369c506.csv
86 | 8d55b5e6-7693-49c0-8f69-2cc2ae0b2f6b.csv
87 | e0d90bb3-b528-43ef-82d9-2ca14fcac266.csv
88 | ```
89 |
90 | ```
91 | cat 74b9c823-fd32-493e-8cdd-45834369c506.csv
92 |
93 | prediction,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
94 | 6.0,7,0.27,0.36,20.7,0.045,45,170,1.001,3,0.45,8.8
95 | 6.0,6.3,0.3,0.34,1.6,0.049,14,132,0.994,3.3,0.49,9.5
96 | 5.0,8.1,0.28,0.4,6.9,0.05,30,97,0.9951,3.26,0.44,10.1
97 | ```
98 |
99 |
--------------------------------------------------------------------------------
/custom_model_train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import uuid
3 | import pandas as pd
4 | import numpy as np
5 | import yaml
6 | import click
7 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
8 | from sklearn.model_selection import train_test_split
9 | from sklearn.tree import DecisionTreeRegressor
10 | import mlflow
11 | import mlflow.sklearn
12 |
13 | print("MLflow Version:", mlflow.__version__)
14 | print("MLflow Tracking URI:", mlflow.get_tracking_uri())
15 | client = mlflow.tracking.MlflowClient()
16 |
17 | class CustomModel(mlflow.pyfunc.PythonModel):
18 | def __init__(self, model):
19 | self.model = model
20 | def predict(self, context, data):
21 | predictions = self.model.predict(data)
22 | data.insert(0, "prediction", predictions.tolist())
23 | out_dir = os.environ.get("MLFLOW_MONITORING_DIR","out")
24 | os.makedirs(out_dir, exist_ok=True)
25 | path = os.path.join(out_dir,str(uuid.uuid4())+".csv")
26 | with open(path, "w") as f:
27 | data.to_csv(f, index=False)
28 | return predictions
29 |
30 | def build_data(data_path):
31 | col_label = "quality"
32 | data = pd.read_csv(data_path)
33 | train, test = train_test_split(data, test_size=0.30, random_state=2019)
34 | X_train = train.drop([col_label], axis=1)
35 | X_test = test.drop([col_label], axis=1)
36 | y_train = train[[col_label]]
37 | y_test = test[[col_label]]
38 | return X_train, X_test, y_train, y_test
39 |
40 | def train(data_path, max_depth, max_leaf_nodes):
41 | X_train, X_test, y_train, y_test = build_data(data_path)
42 | with mlflow.start_run() as run:
43 | run_id = run.info.run_uuid
44 | experiment_id = run.info.experiment_id
45 | print("MLflow:")
46 | print(" run_id:", run_id)
47 | print(" experiment_id:", experiment_id)
48 | print(" experiment_name:", client.get_experiment(experiment_id).name)
49 |
50 | # Create model
51 | dt = DecisionTreeRegressor(max_depth=max_depth, max_leaf_nodes=max_leaf_nodes)
52 | print("Model:\n ", dt)
53 |
54 | # Fit and predict
55 | dt.fit(X_train, y_train)
56 | predictions = dt.predict(X_test)
57 |
58 | # MLflow params
59 | mlflow.log_param("max_depth", max_depth)
60 | mlflow.log_param("max_leaf_nodes", max_leaf_nodes)
61 |
62 | # MLflow metrics
63 | mlflow.log_metric("rmse", np.sqrt(mean_squared_error(y_test, predictions)))
64 | mlflow.log_metric("r2", r2_score(y_test, predictions))
65 | mlflow.log_metric("mae", mean_absolute_error(y_test, predictions))
66 |
67 | # MLflow tags
68 | mlflow.set_tag("data_path", data_path)
69 | mlflow.set_tag("mlflow_version", mlflow.__version__)
70 |
71 | # Pipeline
72 | from sklearn.pipeline import Pipeline
73 | pipeline = Pipeline([('step', dt)])
74 |
75 | # Log model
76 | mlflow.sklearn.log_model(pipeline, "sklearn-model")
77 | register_model(run_id, "sklearn-model", "sklearn-monitor")
78 |
79 | # Log custom model
80 | path = "conda_custom.yaml"
81 | with open(path, "r") as f:
82 | dct = yaml.safe_load(f)
83 | mlflow.pyfunc.log_model("sklearn-model-custom", python_model=CustomModel(pipeline), conda_env=dct)
84 | register_model(run_id, "sklearn-model-custom", "sklearn-monitor-custom")
85 |
86 | return (experiment_id,run_id)
87 |
88 | def register_model(run_id, artifact_model_name, registered_model_name):
89 | version = mlflow.register_model(model_uri=f"runs:/{run_id}/{artifact_model_name}", name=registered_model_name)
90 | client.transition_model_version_stage(
91 | name=version.name,
92 | version=version.version,
93 | stage="Production",
94 | archive_existing_versions=True)
95 |
96 | @click.command()
97 | @click.option("--experiment_name", help="Experiment name", type=str, default="sklearn_monitor")
98 | @click.option("--data-path", help="Data path", type=str, default="data/wine-quality-white.csv")
99 | @click.option("--max-depth", help="Max depth", type=int, default=None)
100 | @click.option("--max-leaf-nodes", help="Max leaf nodes", type=int, default=None)
101 | def main(experiment_name, data_path, max_depth, max_leaf_nodes):
102 | print("Options:")
103 | for k,v in locals().items():
104 | print(f" {k}: {v}")
105 | mlflow.set_experiment(experiment_name)
106 | _,run_id = train(data_path, max_depth, max_leaf_nodes)
107 |
108 | if __name__ == "__main__":
109 | main()
110 |
--------------------------------------------------------------------------------