├── .gitignore ├── Makefile ├── README.md ├── docker ├── .env.example ├── docker-compose.yml ├── minio │ └── create-bucket.sh ├── mlflow │ ├── Dockerfile │ └── requirements.txt └── postgres │ └── init.sql ├── docs ├── .gitkeep └── img │ ├── minio_mlflow_screenshot.png │ └── project_banner.png └── tests ├── requirements.txt └── test_mlflow.py /.gitignore: -------------------------------------------------------------------------------- 1 | ## The .gitignore file specifies things that git should ignore. 2 | ## This default template includes entries for R, Python and visual studio 3 | 4 | ## 5 | ## Add custom entries below here. 6 | ## 7 | dst-env/ 8 | .cache/v/cache/lastfailed 9 | tests/.cache/v/cache/lastfailed 10 | .vscode/settings.json 11 | 12 | ## 13 | ## Python Section - See https://github.com/github/gitignore/blob/master/Python.gitignore 14 | ## 15 | 16 | # PyCharm ide files 17 | .idea 18 | 19 | # Byte-compiled / optimized / DLL files 20 | __pycache__/ 21 | *.py[cod] 22 | *$py.class 23 | 24 | # C extensions 25 | *.so 26 | 27 | # Distribution / packaging 28 | .Python 29 | env/ 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | .hypothesis/ 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | .venv 84 | venv/ 85 | ENV/ 86 | 87 | # mypy 88 | .mypy_cache/ 89 | 90 | ###### Project-specific 91 | # Data 92 | data/raw/* 93 | !data/raw/.gitkeep 94 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DOCKER_DIR=docker 2 | COMPOSE_FILE=$(DOCKER_DIR)/docker-compose.yml 3 | 4 | install: 5 | docker compose \ 6 | --env-file $(DOCKER_DIR)/.env \ 7 | --file $(COMPOSE_FILE) \ 8 | up --detach 9 | 10 | test: 11 | python3 -m venv .venv && source .venv/bin/activate 12 | pip install -r tests/requirements.txt 13 | python3 tests/test_mlflow.py 14 | 15 | clean: 16 | (cd docker/; docker compose down; cd ..;) 17 | 18 | rm -rf .venv 19 | 20 | all: install test 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Banner](./docs/img/project_banner.png) 2 | 3 | # mlflow-setup 4 | 5 | This repository contains the code for setting up MLFlow Tracking Server with PostgreSQL as backend and MinIO as artifact store, using docker-compose. 6 | 7 | ## Prerequisites 8 | 9 | Docker and docker-compose should be installed on your machine, either through [Docker Desktop](https://www.docker.com/products/docker-desktop/), or its alternatives such as [Orbstack](https://orbstack.dev/) 10 | 11 | ## Configure environment variables 12 | 13 | Make a copy of the `docker/.env.example` file and rename it to `docker/.env`. Then, update the environment variables in the `.env` file as per your requirements. 14 | 15 | ## Build and start the services 16 | 17 | ```bash 18 | docker compose up -d --build 19 | ``` 20 | 21 | If everything is setup properly, you should be able to access the services at the following URLs: 22 | 23 | - MLFlow Tracking Server: [http://localhost:5001](http://localhost:5001) 24 | - MinIO Console UI: [http://localhost:9001](http://localhost:9001) 25 | 26 | ![Screenshots](./docs/img/minio_mlflow_screenshot.png) 27 | -------------------------------------------------------------------------------- /docker/.env.example: -------------------------------------------------------------------------------- 1 | # PostgreSQL 2 | POSTGRES_USER=postgres 3 | POSTGRES_PASSWORD=postgres 4 | POSTGRES_DB=postgres 5 | 6 | # MinIO 7 | MINIO_ACCESS_KEY=minioadmin 8 | MINIO_SECRET_ACCESS_KEY=minioadmin 9 | MLFLOW_S3_ENDPOINT=http://minio:9000 10 | -------------------------------------------------------------------------------- /docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | postgres: 4 | image: postgres:latest 5 | container_name: postgres 6 | restart: always 7 | ports: 8 | - "5432:5432" 9 | environment: 10 | - POSTGRES_USER=${POSTGRES_USER} 11 | - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} 12 | - POSTGRES_DB=${POSTGRES_DB} 13 | volumes: 14 | - postgres_data:/var/lib/postgresql/data 15 | - ./postgres/init.sql:/docker-entrypoint-initdb.d/init.sql 16 | 17 | minio: 18 | restart: always 19 | image: quay.io/minio/minio 20 | container_name: minio 21 | ports: 22 | - "9000:9000" 23 | - "9001:9001" 24 | environment: 25 | - MINIO_ROOT_USER=${MINIO_ACCESS_KEY} 26 | - MINIO_ROOT_PASSWORD=${MINIO_SECRET_ACCESS_KEY} 27 | - MINIO_STORAGE_USE_HTTPS=false 28 | command: server /data --console-address ":9001" 29 | volumes: 30 | - minio_data:/data 31 | 32 | minio-setup: 33 | image: quay.io/minio/mc 34 | depends_on: 35 | - minio 36 | volumes: 37 | - ./minio/create-bucket.sh:/create-bucket.sh 38 | entrypoint: /bin/sh 39 | command: -c "chmod +x /create-bucket.sh && /create-bucket.sh" 40 | environment: 41 | - MINIO_ROOT_USER=${MINIO_ACCESS_KEY} 42 | - MINIO_ROOT_PASSWORD=${MINIO_SECRET_ACCESS_KEY} 43 | 44 | mlflow: 45 | restart: always 46 | build: ./mlflow 47 | image: mlflow_server 48 | container_name: mlflow_server 49 | ports: 50 | - "5001:5000" 51 | environment: 52 | - AWS_ACCESS_KEY_ID=${MINIO_ACCESS_KEY} 53 | - AWS_SECRET_ACCESS_KEY=${MINIO_SECRET_ACCESS_KEY} 54 | - MLFLOW_S3_ENDPOINT_URL=${MLFLOW_S3_ENDPOINT} 55 | - MLFLOW_S3_IGNORE_TLS=true 56 | command: > 57 | mlflow server 58 | --backend-store-uri postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres/mlflow 59 | --host 0.0.0.0 60 | --serve-artifacts 61 | --artifacts-destination s3://mlflow 62 | depends_on: 63 | - postgres 64 | - minio-setup 65 | 66 | volumes: 67 | postgres_data: 68 | minio_data: 69 | -------------------------------------------------------------------------------- /docker/minio/create-bucket.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Configure MinIO Client 3 | mc alias set minioserver http://minio:9000 ${MINIO_ROOT_USER} ${MINIO_ROOT_PASSWORD} 4 | 5 | # Create the MLFlow bucket 6 | mc mb minioserver/mlflow 7 | -------------------------------------------------------------------------------- /docker/mlflow/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | 3 | # Install python package 4 | COPY requirements.txt /tmp/ 5 | RUN pip install --no-cache-dir -r /tmp/requirements.txt 6 | -------------------------------------------------------------------------------- /docker/mlflow/requirements.txt: -------------------------------------------------------------------------------- 1 | mlflow==2.11.0 2 | psycopg2-binary==2.9.9 3 | boto3==1.34.55 4 | -------------------------------------------------------------------------------- /docker/postgres/init.sql: -------------------------------------------------------------------------------- 1 | -- Create mlflow database 2 | CREATE DATABASE mlflow; 3 | -------------------------------------------------------------------------------- /docs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/violincoding/mlflow-setup/6e4f699b6464a1fa1684687359ca7c83b246d186/docs/.gitkeep -------------------------------------------------------------------------------- /docs/img/minio_mlflow_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/violincoding/mlflow-setup/6e4f699b6464a1fa1684687359ca7c83b246d186/docs/img/minio_mlflow_screenshot.png -------------------------------------------------------------------------------- /docs/img/project_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/violincoding/mlflow-setup/6e4f699b6464a1fa1684687359ca7c83b246d186/docs/img/project_banner.png -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | mlflow==2.11.0 2 | scikit-learn==1.4.0 3 | numpy==1.26.4 4 | -------------------------------------------------------------------------------- /tests/test_mlflow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import LogisticRegression 3 | 4 | import mlflow 5 | import mlflow.sklearn 6 | from mlflow.models import infer_signature 7 | 8 | mlflow.set_tracking_uri("http://localhost:5001") 9 | 10 | if __name__ == "__main__": 11 | with mlflow.start_run(): 12 | X = np.array([-2, -1, 0, 1, 2, 1]).reshape(-1, 1) 13 | y = np.array([0, 0, 1, 1, 1, 0]) 14 | lr = LogisticRegression() 15 | lr.fit(X, y) 16 | score = lr.score(X, y) 17 | print(f"Score: {score}") 18 | mlflow.log_metric("score", score) 19 | predictions = lr.predict(X) 20 | signature = infer_signature(X, predictions) 21 | mlflow.sklearn.log_model(lr, "model", signature=signature) 22 | print(f"Model saved in run {mlflow.active_run().info.run_uuid}") 23 | --------------------------------------------------------------------------------