├── .gitignore ├── Dockerfile ├── README.md ├── docker-compose.yaml ├── on_premises ├── .env ├── Dockerfile ├── README.md ├── docker-compose.yaml ├── example │ ├── config.yaml │ ├── example_elasticnet_wine.py │ └── experiment_recorder.py ├── images │ ├── on_pre_1.png │ └── on_pre_2.png └── start.sh └── start.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | *.* -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM conda/miniconda3:latest 2 | 3 | RUN mkdir -p /mlflow/mlruns 4 | 5 | WORKDIR /mlflow 6 | 7 | ENV LC_ALL=C.UTF-8 8 | ENV LANG=C.UTF-8 9 | 10 | RUN echo "export LC_ALL=$LC_ALL" >> /etc/profile.d/locale.sh 11 | RUN echo "export LANG=$LANG" >> /etc/profile.d/locale.sh 12 | 13 | RUN apt-get update && apt-get install -y \ 14 | build-essential \ 15 | python3-dev \ 16 | libpq-dev 17 | 18 | RUN pip install -U pip && \ 19 | pip install --ignore-installed google-cloud-storage && \ 20 | pip install psycopg2 mlflow 21 | 22 | COPY ./start.sh ./start.sh 23 | RUN chmod +x ./start.sh 24 | 25 | EXPOSE 80 26 | EXPOSE 443 27 | 28 | CMD ["./start.sh"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mlflow-docker-compose 2 | Deploy mlflow with docker-compose 3 | 4 | # Deploy 5 | ## 1. Login Google Cloud Platform 6 | In this script, mlflow stores artifacts on Google Cloud Storage. 7 | It means you must set up GCP Credentials. 8 | If you already have `application_default_credentials.json`, go next chapter. 9 | 10 | ```sh 11 | $ gcloud auth application-default login 12 | ``` 13 | `application_default_credentials.json` will be saved `${HOME}/.config/gcloud/` 14 | 15 | 16 | ## 2. Create .env file 17 | In `docker-composa.yaml`, some parameters is loaded from `.env` file. 18 | Set following parameters in `.env`. 19 | 20 | - HOST: host name(If you don't use domain, any name is accepted. If use, speciy it) 21 | - POSTGRES_USER: postgresql db user 22 | - POSTGRES_PASSWORD: postgresql db user password 23 | - GCP_STORAGE_BUCKET: Google Cloud Storage bucket name mlflow will store artifact 24 | - CREDENTIALS_PATH: Path to `application_default_credentials.json` 25 | - GCLOUD_PROJECT: GCP Project name you use 26 | 27 | ``` 28 | HOST=mlflow.dev 29 | POSTGRES_USER=demo-user 30 | POSTGRES_PASSWORD=demo-password 31 | GCP_STORAGE_BUCKET=demo-bucket 32 | CREDENTIALS_PATH=~/.config/gcloud/application_default_credentials.json 33 | GCLOUD_PROJECT=demo-project 34 | ``` 35 | 36 | ## 3. Set up NGINX Basic Authentication 37 | Because mlflow doesn't provide authentication, use NGINX proxy for basic authentication system. 38 | 39 | ```sh 40 | $ sudo echo "{USER_NAME}:$(openssl passwd -apr1 {PASSWORD})" >> ${HOST} 41 | ``` 42 | 43 | `${HOST}` is host name you set in chapter 2. 44 | 45 | ## 4. Build and deploy 46 | Build mlflow Dockerfilw, and then deploy applications. 47 | 48 | ```sh 49 | $ sudo docker-compose build 50 | $ sudo docker-compose up -d 51 | ``` 52 | 53 | # Client 54 | To use Basic authentication, mlflow use following parameters passing HTTP authentication. 55 | Set following environment parameters in local, same as [3. Set up NGINX Basic Authentication](#3-Set-up-NGINX-Basic-Authentication) 56 | 57 | - MLFLOW_TRACKING_USERNAME 58 | - MLFLOW_TRACKING_PASSWORD 59 | 60 | See also https://www.mlflow.org/docs/latest/tracking.html#logging-to-a-tracking-server 61 | 62 | # Update MLflow version 63 | If you want update MLflow, stop container and remove images, and then rebuild MLflow container. 64 | 65 | ```sh 66 | $ sudo docker-compose stop mlflow && \ 67 | sudo docker-compose rm mlflow && \ 68 | docker images mlflow-docker-compose_mlflow --format '{{.ID}}'|xargs docker rmi && \ 69 | sudo docker-compose build && \ 70 | sudo docker-compose up -d 71 | ``` 72 | 73 | # On-Premises version 74 | 75 | In case you'd like to run MLflow on your on-premises server, `on_premises` folder is for you. 76 | Batteries (sample setting files) included. 77 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | waitfordb: 4 | image: dadarek/wait-for-dependencies 5 | depends_on: 6 | - postgresql 7 | command: postgresql:5432 8 | 9 | postgresql: 10 | image: postgres:10.5 11 | container_name: postgresql 12 | ports: 13 | - 5432:5432 14 | environment: 15 | POSTGRES_USER: ${POSTGRES_USER} 16 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} 17 | POSTGRES_DB: mlflow-db 18 | POSTGRES_INITDB_ARGS: "--encoding=UTF-8" 19 | hostname: postgresql 20 | restart: always 21 | 22 | mlflow: 23 | build: . 24 | container_name: mlflow 25 | expose: 26 | - 80 27 | - 443 28 | depends_on: 29 | - postgresql 30 | - waitfordb 31 | volumes: 32 | - ${CREDENTIALS_PATH}:/opt/application_default_credentials.json 33 | environment: 34 | DB_URI: postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgresql:5432/mlflow-db 35 | GCP_STORAGE_BUCKET: "${GCP_STORAGE_BUCKET}" 36 | VIRTUAL_HOST: ${HOST} 37 | VIRTUAL_PORT: 80 38 | LETSENCRYPT_HOST: ${HOST} 39 | LETSENCRYPT_EMAIL: example@gmail.com 40 | GOOGLE_APPLICATION_CREDENTIALS: /opt/application_default_credentials.json 41 | GCLOUD_PROJECT: ${GCLOUD_PROJECT} 42 | 43 | nginx-proxy: 44 | image: jwilder/nginx-proxy 45 | container_name: nginx-proxy 46 | restart: always 47 | ports: 48 | - "80:80" 49 | - "443:443" 50 | volumes: 51 | - ./${HOST}:/etc/nginx/htpasswd/${HOST} 52 | - html:/usr/share/nginx/html 53 | - dhparam:/etc/nginx/dhparam 54 | - vhost:/etc/nginx/vhost.d 55 | - certs:/etc/nginx/certs:ro 56 | - /var/run/docker.sock:/tmp/docker.sock:ro 57 | - conf:/etc/nginx/conf.d 58 | environment: 59 | DEFAULT_HOST: ${HOST} 60 | DHPARAM_GENERATION: "false" 61 | HTTPS_METHOD: noredirect 62 | labels: 63 | - "com.github.jrcs.letsencrypt_nginx_proxy_companion.nginx_proxy" 64 | 65 | 66 | letsencrypt-nginx-proxy-companion: 67 | image: jrcs/letsencrypt-nginx-proxy-companion 68 | container_name: nginx-proxy-lets-encrypt 69 | restart: always 70 | depends_on: 71 | - nginx-proxy 72 | volumes: 73 | - conf:/etc/nginx/conf.d 74 | - certs:/etc/nginx/certs:rw 75 | - vhost:/etc/nginx/vhost.d 76 | - html:/usr/share/nginx/html 77 | - /var/run/docker.sock:/var/run/docker.sock:ro 78 | environment: 79 | NGINX_PROXY_CONTAINER: nginx-proxy 80 | 81 | 82 | volumes: 83 | certs: 84 | html: 85 | vhost: 86 | dhparam: 87 | conf: 88 | -------------------------------------------------------------------------------- /on_premises/.env: -------------------------------------------------------------------------------- 1 | HOST=mlflow-server 2 | MLFLOW_PORT=5000 3 | POSTGRES_USER=mlflow_user 4 | POSTGRES_PASSWORD=mlflow_pwd 5 | ARTIFACT_PATH=/tmp/artifacts 6 | -------------------------------------------------------------------------------- /on_premises/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM conda/miniconda3:latest 2 | 3 | RUN mkdir -p /mlflow/mlruns 4 | 5 | WORKDIR /mlflow 6 | 7 | ENV LC_ALL=C.UTF-8 8 | ENV LANG=C.UTF-8 9 | 10 | RUN echo "export LC_ALL=$LC_ALL" >> /etc/profile.d/locale.sh 11 | RUN echo "export LANG=$LANG" >> /etc/profile.d/locale.sh 12 | 13 | RUN apt-get update && apt-get install -y \ 14 | build-essential \ 15 | python3-dev \ 16 | libpq-dev 17 | 18 | RUN pip install -U pip && \ 19 | pip install --ignore-installed google-cloud-storage && \ 20 | pip install psycopg2 mlflow 21 | 22 | COPY ./start.sh ./start.sh 23 | RUN chmod +x ./start.sh 24 | 25 | CMD ["./start.sh"] -------------------------------------------------------------------------------- /on_premises/README.md: -------------------------------------------------------------------------------- 1 | # On-premises Configuration Example 2 | 3 | You can quickly setup your MLflow on-premises environment with this example. 4 | 5 | [Batteries Included](https://www.python.org/dev/peps/pep-0206/#batteries-included-philosophy), settings files are basically all done. 6 | 7 | ## Quickstart 8 | 9 | 1. Install docker, then install MLflow and hydra. 10 | 11 | ```sh 12 | pip install mlflow hydra-core 13 | ``` 14 | 15 | 2. Make a folder to store artifacts. 16 | 17 | Edit `.env` if you want to change folder, it's `/tmp/artifacts` by default. 18 | 19 | ```sh 20 | mkdir /tmp/artifacts 21 | ``` 22 | 23 | 3. Get your mlflow server up and running. This takes time. 24 | 25 | ```sh 26 | docker-compose up --build -d 27 | ``` 28 | 29 | 4. Confirm your server is running properly. 30 | 31 | Open server URI. It's `http://your-server-ip-or-host-name:5000/`. 32 | 33 | Now let's run an example, this will run for all convinations of `alpha` and `l1_ratio` parameters. 34 | 35 | ```sh 36 | cd example 37 | python example_elasticnet_wine.py alpha=0.2,0.3,0.4,0.5,0.6,0.7 l1_ratio=0.4,0.5,0.6 -m 38 | ``` 39 | 40 | If it runs successfully, reload browser and click on `sample_elasticnet_wine`: 41 | 42 | ![result image](images/on_pre_1.png) 43 | 44 | ### Where are your artifacts on browser? 45 | 46 | If you click on one of run, deteil will open. And you can also find artifacts at the bottom of the page. 47 | 48 | ![result image](images/on_pre_2.png) 49 | 50 | ## Basic design 51 | 52 | - User ID/password are basically fixed, these are used only for postgresql. 53 | - Port is set to 5000 by default. 54 | - Artifact folder is a little tricky, it has to be the same pathname between on the local environment and on the server running on docker. So it's set to `/tmp/artifacts` by default. This is kind of workaround to follow behavior of MLflow. 55 | 56 | Find settings in `.env` if you need to change. 57 | 58 | - On the client (ML application) side, `ExperimentRecorder` class wraps things essentially needed: environment variables, starting MLflow and etc. 59 | - The `example/example_elasticnet_wine.py` explains how you can use in your ML apps. 60 | 61 | ## Trouble shooting 62 | 63 | Stop containers first. 64 | 65 | ```sh 66 | docker-compose down 67 | ``` 68 | 69 | See what's happening by running without `-d`. 70 | 71 | ```sh 72 | docker-compose up 73 | ``` 74 | 75 | You might see some errors, check them and fix... 76 | 77 | ## Cleaning docker-created-files 78 | 79 | Followings will clean up both containers/images. 80 | 81 | ```sh 82 | docker ps -aq |xargs docker rm 83 | docker images -aq |xargs docker rmi 84 | ``` 85 | 86 | Following will clean up cache. 87 | 88 | ```sh 89 | docker system prune -a 90 | ``` 91 | -------------------------------------------------------------------------------- /on_premises/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | waitfordb: 4 | image: dadarek/wait-for-dependencies 5 | depends_on: 6 | - postgresql 7 | command: postgresql:5432 8 | 9 | postgresql: 10 | image: postgres:10.5 11 | container_name: postgresql 12 | ports: 13 | - 5432:5432 14 | environment: 15 | POSTGRES_USER: ${POSTGRES_USER} 16 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} 17 | POSTGRES_DB: mlflow-db 18 | POSTGRES_INITDB_ARGS: "--encoding=UTF-8" 19 | hostname: postgresql 20 | restart: always 21 | 22 | mlflow: 23 | build: . 24 | container_name: mlflow 25 | ports: 26 | - ${MLFLOW_PORT}:${MLFLOW_PORT} 27 | depends_on: 28 | - postgresql 29 | - waitfordb 30 | volumes: 31 | - ${ARTIFACT_PATH}:${ARTIFACT_PATH} 32 | environment: 33 | DB_URI: postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgresql:5432/mlflow-db 34 | VIRTUAL_HOST: ${HOST} 35 | VIRTUAL_PORT: ${MLFLOW_PORT} 36 | ARTIFACT_PATH: ${ARTIFACT_PATH} 37 | command: ./start.sh 38 | # <> 39 | # stdin_open: true 40 | # tty: true 41 | # command: /bin/bash 42 | -------------------------------------------------------------------------------- /on_premises/example/config.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.5 2 | l1_ratio: 0.5 3 | sample: 4 | level2: 5 | level3: bar 6 | -------------------------------------------------------------------------------- /on_premises/example/example_elasticnet_wine.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example based on the original mlflow/examples/sklearn_elasticnet_wine/train.py, 3 | edited for explaining how to work with things. 4 | 5 | URL: https://github.com/mlflow/mlflow/blob/master/examples/sklearn_elasticnet_wine/train.py 6 | """ 7 | 8 | import os 9 | import warnings 10 | import sys 11 | 12 | import pandas as pd 13 | import numpy as np 14 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 15 | from sklearn.model_selection import train_test_split 16 | from sklearn.linear_model import ElasticNet 17 | 18 | from experiment_recorder import * 19 | import mlflow.sklearn 20 | 21 | 22 | warnings.filterwarnings("ignore") 23 | np.random.seed(40) 24 | 25 | 26 | def eval_metrics(actual, pred): 27 | rmse = np.sqrt(mean_squared_error(actual, pred)) 28 | mae = mean_absolute_error(actual, pred) 29 | r2 = r2_score(actual, pred) 30 | return rmse, mae, r2 31 | 32 | 33 | def train(cwd, logger, in_alpha, in_l1_ratio): 34 | """Almost the same with the original example""" 35 | 36 | # Read the wine-quality csv file from the URL 37 | csv_url =\ 38 | 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv' 39 | try: 40 | data = pd.read_csv(csv_url, sep=';') 41 | except Exception as e: 42 | logger.exception( 43 | "Unable to download training & test CSV, check your internet connection. Error: %s", e) 44 | 45 | # Split the data into training and test sets. (0.75, 0.25) split. 46 | train, test = train_test_split(data) 47 | 48 | # The predicted column is "quality" which is a scalar from [3, 9] 49 | train_x = train.drop(["quality"], axis=1) 50 | test_x = test.drop(["quality"], axis=1) 51 | train_y = train[["quality"]] 52 | test_y = test[["quality"]] 53 | 54 | # Set default values if no alpha is provided 55 | if float(in_alpha) is None: 56 | alpha = 0.5 57 | else: 58 | alpha = float(in_alpha) 59 | 60 | # Set default values if no l1_ratio is provided 61 | if float(in_l1_ratio) is None: 62 | l1_ratio = 0.5 63 | else: 64 | l1_ratio = float(in_l1_ratio) 65 | 66 | # Execute ElasticNet 67 | lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) 68 | lr.fit(train_x, train_y) 69 | 70 | # Evaluate Metrics 71 | predicted_qualities = lr.predict(test_x) 72 | (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) 73 | 74 | # Print out metrics 75 | logger.info("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) 76 | logger.info(" RMSE: %s" % rmse) 77 | logger.info(" MAE: %s" % mae) 78 | logger.info(" R2: %s" % r2) 79 | 80 | return lr, rmse, mae, r2 81 | 82 | 83 | @hydra.main(config_path='config.yaml') 84 | def main(cfg: DictConfig) -> None: 85 | """Main part of this example.""" 86 | 87 | ### 1. This set environment variables, starts a run and gets logger. 88 | recorder = ExperimentRecorder('example_elasticnet_wine', 89 | run_name=f'alpha={cfg.alpha},l1={cfg.l1_ratio}') 90 | org_dir, run_dir, logger = recorder.get_things() # Logging settings are all done by Hydra. 91 | logger.info(f'cwd was {org_dir}...') # Note that Hydra changes cwd. 92 | logger.info(f'running cwd is {run_dir}...') # 93 | 94 | # 2. Do your job: this trains a model. 95 | lr, rmse, mae, r2 = train(org_dir, logger, cfg.alpha, cfg.l1_ratio) 96 | 97 | # 3. Record all you want to leave: parameters, metrics, and model to MLflow. 98 | recorder.log_all_params(cfg) # To record all parameters, this is usuful. 99 | mlflow.log_metric("rmse", rmse) # To record each metric or parameter, use mlflow api. 100 | mlflow.log_metric("r2", r2) 101 | mlflow.log_metric("mae", mae) 102 | 103 | mlflow.sklearn.log_model(lr, "model") 104 | 105 | # Hydra's artifacts. 106 | mlflow.log_artifact('.hydra/config.yaml') 107 | mlflow.log_artifact('.hydra/hydra.yaml') 108 | mlflow.log_artifact('.hydra/overrides.yaml') 109 | mlflow.log_artifact('example_elasticnet_wine.log') 110 | 111 | # 4. Let's finish. 112 | recorder.end_run() 113 | 114 | 115 | if __name__ == "__main__": 116 | main() 117 | -------------------------------------------------------------------------------- /on_premises/example/experiment_recorder.py: -------------------------------------------------------------------------------- 1 | import os 2 | try: 3 | from omegaconf import DictConfig, ListConfig 4 | import mlflow 5 | import hydra 6 | except: 7 | raise Exception('Please make sure to install "pip install mlflow hydra-core".') 8 | import logging 9 | 10 | 11 | class ExperimentRecorder(): 12 | """MLflow/Hydra simple wrapper for making easy to record experiment details. 13 | Thanks to https://ymym3412.hatenablog.com/entry/2020/02/09/034644 14 | """ 15 | 16 | def __init__(self, experiment_name, run_name=None, 17 | uri='http://0.0.0.0:5000', username='mlflow_user', password='mlflow_pwd'): 18 | os.environ['MLFLOW_TRACKING_URI'] = uri 19 | os.environ['MLFLOW_TRACKING_USERNAME'] = username 20 | os.environ['MLFLOW_TRACKING_PASSWORD'] = password 21 | 22 | mlflow.set_experiment(experiment_name) 23 | mlflow.start_run(run_name=run_name) 24 | 25 | logging.basicConfig(level=logging.WARN) 26 | 27 | def get_things(self): 28 | org_dir = hydra.utils.get_original_cwd() 29 | run_dir = os.path.abspath('.') 30 | return org_dir, run_dir, logging.getLogger(__name__) 31 | 32 | def log_all_params(self, root_param): 33 | self._explore_recursive('', root_param) 34 | 35 | def _explore_recursive(self, parent_name, element): 36 | if isinstance(element, DictConfig): 37 | for k, v in element.items(): 38 | if isinstance(v, DictConfig) or isinstance(v, ListConfig): 39 | self._explore_recursive(f'{parent_name}{k}.', v) 40 | else: 41 | mlflow.log_param(f'{parent_name}{k}', v) 42 | elif isinstance(element, ListConfig): 43 | for i, v in enumerate(element): 44 | mlflow.log_param(f'{parent_name}{i}', v) 45 | else: 46 | print('ignored to log param:', element) 47 | 48 | # def log_param(self, key, value): # --> simply, `mlflow.log_param(...)` 49 | # mlflow.log_param(key, value) 50 | 51 | # def log_metric(self, key, value, step=None): 52 | # mlflow.log_metric(key, value, step=step) 53 | 54 | # def log_artifact(self, local_path): 55 | # mlflow.log_artifact(local_path) 56 | 57 | def end_run(self): 58 | mlflow.end_run() 59 | -------------------------------------------------------------------------------- /on_premises/images/on_pre_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ymym3412/mlflow-docker-compose/62fb84678e2f42e851e104768802db4d277fe38c/on_premises/images/on_pre_1.png -------------------------------------------------------------------------------- /on_premises/images/on_pre_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ymym3412/mlflow-docker-compose/62fb84678e2f42e851e104768802db4d277fe38c/on_premises/images/on_pre_2.png -------------------------------------------------------------------------------- /on_premises/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | # uncomment below when you see any issue with db... 8 | # mlflow db upgrade $DB_URI 9 | 10 | mlflow server \ 11 | --backend-store-uri $DB_URI \ 12 | --host 0.0.0.0 \ 13 | --port $VIRTUAL_PORT \ 14 | --default-artifact-root $ARTIFACT_PATH 15 | 16 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | # uncomment below when you see any issue with db... 8 | # https://github.com/ymym3412/mlflow-docker-compose/issues/4 9 | # mlflow db upgrade $DB_URI 10 | 11 | mlflow server \ 12 | --backend-store-uri $DB_URI \ 13 | --host 0.0.0.0 \ 14 | --port 80 \ 15 | --default-artifact-root gs://$GCP_STORAGE_BUCKET 16 | --------------------------------------------------------------------------------