├── .python-version ├── core ├── prediction.py ├── preprocessing.py ├── trainer.py ├── data_loader.py └── evaluator.py ├── .gitignore ├── config └── hyperparams.py ├── pyproject.toml ├── inference └── inference_pipeline.py ├── pipeline └── run_experiment.py ├── view └── confusion_matrix.py ├── main.py ├── bad-code ├── no-mlflow.py └── with-mlflow.py ├── logger └── mlflow_logger.py └── README.md /.python-version: -------------------------------------------------------------------------------- 1 | 3.11 2 | -------------------------------------------------------------------------------- /core/prediction.py: -------------------------------------------------------------------------------- 1 | def predict(model, processed_data): 2 | ... -------------------------------------------------------------------------------- /core/preprocessing.py: -------------------------------------------------------------------------------- 1 | def preprocessing_pipeline(data): 2 | ... -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | model/ 2 | mlruns/ 3 | .venv/ 4 | uv.lock 5 | __pycache__ 6 | plots/ -------------------------------------------------------------------------------- /config/hyperparams.py: -------------------------------------------------------------------------------- 1 | HYPERPARAMS = { 2 | "objective": "multi:softprob", 3 | "num_class": 3, 4 | "learning_rate": 0.3, 5 | "eval_metric": "mlogloss", 6 | "colsample_bytree": 0.7, 7 | "subsample": 1, 8 | "seed": 42, 9 | } 10 | -------------------------------------------------------------------------------- /core/trainer.py: -------------------------------------------------------------------------------- 1 | import xgboost as xgb 2 | 3 | 4 | def train_model(params: dict, X_train, y_train): 5 | dtrain = xgb.DMatrix(X_train, label=y_train) 6 | model = xgb.train(params, dtrain, evals=[(dtrain, "train")], verbose_eval=False) 7 | return model 8 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "poc-mlflow" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = ">=3.11" 7 | dependencies = [ 8 | "black>=25.1.0", 9 | "isort>=6.0.1", 10 | "mlflow>=2.21.3", 11 | "seaborn>=0.13.2", 12 | "xgboost>=3.0.0", 13 | ] 14 | -------------------------------------------------------------------------------- /inference/inference_pipeline.py: -------------------------------------------------------------------------------- 1 | import mlflow 2 | 3 | from core.evaluator import evaluate_model 4 | from core.trainer import train_model 5 | from core.preprocessing import preprocessing_pipeline 6 | from core.prediction import predict 7 | 8 | def inference_pipeline( 9 | data, model 10 | ) -> dict: 11 | processed_data = preprocessing_pipeline(data) 12 | y = predict(model, processed_data) 13 | 14 | return {"prediction": y} 15 | -------------------------------------------------------------------------------- /core/data_loader.py: -------------------------------------------------------------------------------- 1 | from sklearn import datasets 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | def load_data(): 6 | iris = datasets.load_iris() 7 | return iris.data, iris.target, iris.target_names 8 | 9 | 10 | def prepare_data(X, y, target_names, test_size=0.21, random_state=31): 11 | return ( 12 | train_test_split(X, y, test_size=test_size, random_state=random_state), 13 | target_names, 14 | ) 15 | -------------------------------------------------------------------------------- /pipeline/run_experiment.py: -------------------------------------------------------------------------------- 1 | import mlflow 2 | 3 | from core.evaluator import evaluate_model 4 | from core.trainer import train_model 5 | 6 | 7 | def run_pipeline( 8 | X_train, X_test, y_train, y_test, target_names: list[str], params: dict 9 | ) -> dict: 10 | model = train_model(params, X_train, y_train) 11 | metrics = evaluate_model(model, X_test, y_test) 12 | 13 | return {"model": model, "metrics": metrics, "target_names": target_names} 14 | -------------------------------------------------------------------------------- /core/evaluator.py: -------------------------------------------------------------------------------- 1 | import xgboost as xgb 2 | from sklearn.metrics import accuracy_score, confusion_matrix, log_loss 3 | 4 | 5 | def evaluate_model(model, X_test, y_test): 6 | dtest = xgb.DMatrix(X_test) 7 | y_proba = model.predict(dtest) 8 | y_pred = y_proba.argmax(axis=1) 9 | 10 | metrics = { 11 | "accuracy": accuracy_score(y_test, y_pred), 12 | "log_loss": log_loss(y_test, y_proba), 13 | "confusion_matrix": confusion_matrix(y_test, y_pred), 14 | "y_pred": y_pred, 15 | "y_proba": y_proba, 16 | } 17 | 18 | return metrics 19 | -------------------------------------------------------------------------------- /view/confusion_matrix.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | import seaborn as sns 6 | from mlflow.xgboost import log_model 7 | from sklearn.metrics import confusion_matrix 8 | 9 | 10 | def save_confusion_matrix_plot(y_true, y_pred, labels, path: str): 11 | cm = confusion_matrix(y_true, y_pred) 12 | df_cm = pd.DataFrame(cm, index=labels, columns=labels) 13 | plt.figure(figsize=(6, 4)) 14 | sns.heatmap(df_cm, annot=True, fmt="d", cmap="Blues") 15 | plt.ylabel("Actual") 16 | plt.xlabel("Predicted") 17 | plt.tight_layout() 18 | os.makedirs(os.path.dirname(path), exist_ok=True) 19 | plt.savefig(path) 20 | plt.close() 21 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from config.hyperparams import HYPERPARAMS 2 | from core.data_loader import load_data, prepare_data 3 | from logger.mlflow_logger import persist_results 4 | from pipeline.run_experiment import run_pipeline 5 | 6 | 7 | def main(): 8 | X, y, target_names = load_data() 9 | 10 | (X_train, X_test, y_train, y_test), target_names = prepare_data(X, y, target_names) 11 | 12 | results = run_pipeline( 13 | X_train=X_train, 14 | X_test=X_test, 15 | y_train=y_train, 16 | y_test=y_test, 17 | target_names=target_names, 18 | params=HYPERPARAMS, 19 | ) 20 | 21 | # persist results 22 | persist_status = persist_results( 23 | model=results["model"], 24 | metrics=results["metrics"], 25 | y_test=y_test, 26 | target_names=target_names, 27 | params=PARAMS, 28 | ) 29 | 30 | print("Persist status:", persist_status["status"]) 31 | print("Message:", persist_status["message"]) 32 | print("Run ID", persist_status["run_id"]) 33 | print("Done!") 34 | 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /bad-code/no-mlflow.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import xgboost as xgb 4 | from sklearn import datasets 5 | from sklearn.metrics import accuracy_score, confusion_matrix, log_loss 6 | from sklearn.model_selection import train_test_split 7 | 8 | # load dataset 9 | iris = datasets.load_iris() 10 | 11 | X = iris.data 12 | y = iris.target 13 | 14 | # split dataset 15 | X_train, X_test, y_train, y_test = train_test_split( 16 | X, y, test_size=0.21, random_state=31 17 | ) 18 | dtrain = xgb.DMatrix(X_train, label=y_train) 19 | dtest = xgb.DMatrix(X_test, label=y_test) 20 | 21 | # get parameters 22 | params = { 23 | "objective": "multi:softprob", 24 | "num_class": 3, 25 | "learning_rate": 0.3, 26 | "eval_metric": "mlogloss", 27 | "colsample_bytree": 0.7, 28 | "subsample": 1, 29 | "seed": 42, 30 | } 31 | 32 | # set model 33 | model = xgb.train(params, dtrain, evals=[(dtrain, "train")], verbose_eval=False) 34 | 35 | # get prediction 36 | y_proba = model.predict(dtest) 37 | y_pred = y_proba.argmax(axis=1) 38 | 39 | # get metrics 40 | loss = log_loss(y_test, y_proba) 41 | acc = accuracy_score(y_test, y_pred) 42 | 43 | # print metrics 44 | print(f"Metrics:\nLoss: {loss}\nAccuracy: {acc}") 45 | print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}") 46 | 47 | # save model 48 | os.makedirs("model", exist_ok=True) 49 | model.save_model("model/iris_xgboost.json") 50 | -------------------------------------------------------------------------------- /logger/mlflow_logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import mlflow 4 | from mlflow.xgboost import log_model 5 | 6 | from view.confusion_matrix import save_confusion_matrix_plot 7 | 8 | 9 | def log_params(params: dict): 10 | for key, value in params.items(): 11 | mlflow.log_param(key, value) 12 | 13 | 14 | def log_metrics(metrics: dict): 15 | to_log = {k: v for k, v in metrics.items() if isinstance(v, (int, float))} 16 | mlflow.log_metrics(to_log) 17 | 18 | 19 | def log_confusion_matrix(y_true, y_pred, labels): 20 | cm_path = "plots/confusion_matrix.png" 21 | save_confusion_matrix_plot(y_true, y_pred, labels, cm_path) 22 | mlflow.log_artifact(cm_path) 23 | 24 | 25 | def persist_results(model, metrics, y_test, target_names, params) -> dict: 26 | try: 27 | with mlflow.start_run(): 28 | run_id = mlflow.active_run().info.run_id 29 | log_params(params) 30 | mlflow.log_param("problem_type", "multiclass") 31 | log_metrics(metrics) 32 | log_confusion_matrix(y_test, metrics["y_pred"], target_names) 33 | log_model(model, artifact_path="model") 34 | 35 | return { 36 | "status": "success", 37 | "message": "Results logged and model saved successfully", 38 | "run_id": run_id, 39 | "logged_metrics": list(metrics.keys()), 40 | } 41 | 42 | except Exception as e: 43 | return { 44 | "status": "failure", 45 | "message": f"Logging failed: {str(e)}", 46 | "error_type": type(e).__name__, 47 | } 48 | -------------------------------------------------------------------------------- /bad-code/with-mlflow.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import mlflow 3 | import pandas as pd 4 | import seaborn as sns 5 | import xgboost as xgb 6 | from sklearn import datasets 7 | from sklearn.metrics import accuracy_score, confusion_matrix, log_loss 8 | from sklearn.model_selection import train_test_split 9 | 10 | # load dataset 11 | iris = datasets.load_iris() 12 | 13 | X = iris.data 14 | y = iris.target 15 | 16 | # split dataset 17 | X_train, X_test, y_train, y_test = train_test_split( 18 | X, y, test_size=0.21, random_state=31 19 | ) 20 | dtrain = xgb.DMatrix(X_train, label=y_train) 21 | dtest = xgb.DMatrix(X_test, label=y_test) 22 | 23 | 24 | # log confusion matrix 25 | def log_confusion_matrix(y_true, y_pred, labels): 26 | cm = confusion_matrix(y_true, y_pred) 27 | df_cm = pd.DataFrame(cm, index=labels, columns=labels) 28 | plt.figure(figsize=(6, 4)) 29 | sns.heatmap(df_cm, annot=True, fmt="d", cmap="Blues") 30 | plt.ylabel("Actual") 31 | plt.xlabel("Predicted") 32 | plt.tight_layout() 33 | cm_path = "confusion_matrix.png" 34 | mlflow.log_artifact(cm_path) 35 | 36 | 37 | with mlflow.start_run(): 38 | # get parameters 39 | params = { 40 | "objective": "multi:softprob", 41 | "num_class": 3, 42 | "learning_rate": 0.3, 43 | "eval_metric": "mlogloss", 44 | "colsample_bytree": 0.7, 45 | "subsample": 1, 46 | "seed": 42, 47 | } 48 | 49 | # set model 50 | model = xgb.train(params, dtrain, evals=[(dtrain, "train")], verbose_eval=False) 51 | 52 | # get prediction 53 | y_proba = model.predict(dtest) 54 | y_pred = y_proba.argmax(axis=1) 55 | 56 | # get metrics 57 | loss = log_loss(y_test, y_proba) 58 | acc = accuracy_score(y_test, y_pred) 59 | 60 | # explicitly log parameter 61 | for key, value in params.items(): 62 | mlflow.log_param(key, value) 63 | mlflow.log_param("problem_type", "multiclass") 64 | 65 | # log metrics 66 | mlflow.log_metrics({"log_loss": loss, "accuracy": acc}) 67 | 68 | # log confusion matrix 69 | log_confusion_matrix(y_test, y_pred, iris.target_names) 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MLflow XGBoost Iris Classifier 2 | 3 | This repository implements a modular machine learning pipeline for multi-class classification using the Iris dataset and XGBoost, with full experiment tracking through MLflow. The project adheres to Clean Architecture principles, functional programming practices, and software engineering standards such as SOLID and Separation of Concerns. 4 | 5 | ## Purpose 6 | 7 | The goal is to provide a reproducible, maintainable, and extensible pipeline for model training, evaluation, and logging—suitable for use in both research and production-grade machine learning workflows. 8 | 9 | ## Project Structure 10 | 11 | ``` 12 | POC-MLFLOW/ 13 | ├── bad-code/ 14 | │ ├── no-mlflow.py # Baseline script without MLflow logging 15 | │ └── with-mlflow.py # Equivalent script using MLflow tracking 16 | │ 17 | ├── config/ 18 | │ └── hyperparams.py # Centralized model hyperparameters 19 | │ 20 | ├── core/ 21 | │ ├── data_loader.py # Loads and splits the dataset 22 | │ ├── evaluator.py # Computes evaluation metrics 23 | │ └── trainer.py # Trains the XGBoost model 24 | │ 25 | ├── logger/ 26 | │ └── mlflow_logger.py # Encapsulates MLflow logging logic 27 | │ 28 | ├── pipeline/ 29 | │ └── run_experiment.py # Orchestrates the pure pipeline 30 | │ 31 | ├── model/ 32 | │ └── iris_xgboost.json # Optional: serialized model artifact 33 | │ 34 | ├── plots/ # Temporary directory for confusion matrix plot 35 | │ 36 | ├── view/ 37 | │ └── confusion_matrix.py # Generates confusion matrix figure 38 | │ 39 | ├── mlruns/ # MLflow tracking directory (auto-generated) 40 | ├── main.py # Pipeline entry point 41 | ├── pyproject.toml # Project dependencies and metadata 42 | ├── .gitignore # Files and folders to be excluded from version control 43 | ├── .python-version # Python version specification 44 | └── README.md # Project documentation 45 | 46 | ``` 47 | 48 | ## Features 49 | 50 | - Functional and modular design 51 | - Clean separation of responsibilities across components 52 | - Parameterized configuration (no hardcoded logic inside pipeline) 53 | - Logging of metrics, parameters, and artifacts to MLflow 54 | - Confusion matrix visualization logged without local disk dependency 55 | - Structured error handling and explicit status returns 56 | 57 | ## How to Run 58 | 59 | This project is managed using [`uv`](https://github.com/astral-sh/uv), a fast Python package manager compatible with `pyproject.toml`. 60 | 61 | ### 1. Install `uv` (if not already installed) 62 | 63 | ```bash 64 | pip install uv 65 | ``` 66 | 67 | ### 2. Install project dependencies 68 | 69 | ```bash 70 | uv pip install -r pyproject.toml 71 | ``` 72 | 73 | Alternatively, if you're using `uv` as the main toolchain: 74 | 75 | ```bash 76 | uv venv 77 | uv pip install -e . 78 | ``` 79 | 80 | ### 3. Run the pipeline 81 | 82 | ```bash 83 | python main.py 84 | ``` 85 | 86 | This will: 87 | - Train an XGBoost model on the Iris dataset 88 | - Evaluate it using accuracy and log loss 89 | - Log all metadata to MLflow 90 | - Store the trained model and confusion matrix plot as artifacts 91 | 92 | ## MLflow Tracking 93 | 94 | To start the MLflow UI locally: 95 | 96 | ```bash 97 | mlflow ui 98 | ``` 99 | 100 | Then access the interface at: 101 | 102 | ``` 103 | http://localhost:5000 104 | ``` 105 | 106 | ## Key Logged Components 107 | 108 | - Model parameters (learning rate, subsample, etc.) 109 | - Accuracy and log loss 110 | - Confusion matrix (visualized and logged in memory) 111 | - Trained model (logged via `mlflow.xgboost`) 112 | 113 | ## Architectural Principles 114 | 115 | This project follows the following software design principles: 116 | 117 | - **Single Responsibility**: Each module handles one concern 118 | - **Open/Closed**: Logic can be extended (e.g., new models, metrics) without modification 119 | - **Functional Core**: The core pipeline is pure and stateless 120 | - **Isolated Side Effects**: Only explicitly defined logging modules perform I/O 121 | - **Testability**: Each function can be unit tested in isolation 122 | 123 | ## Future Improvements 124 | 125 | - Add test coverage with `pytest` 126 | - Abstract model training into a strategy pattern 127 | - Extend to support additional datasets or model types (e.g., LightGBM, RandomForest) 128 | - Integrate MLflow model registry and serving infrastructure 129 | 130 | ## License 131 | 132 | This project is licensed under the MIT License. --------------------------------------------------------------------------------