├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── compose_variations ├── README.md ├── docker-compose.mlflow_existingpostgres.yaml ├── docker-compose.mlflow_mysql.yaml ├── docker-compose.mlflow_newpostgres.yaml ├── docker-compose.mlflow_postgres.yaml ├── docker-compose.mlflow_postgres_nginx.yaml ├── docker-compose.mlflow_sqlite.yaml └── docker-compose.orig.yaml ├── docker-compose.yaml ├── mlflow └── Dockerfile └── nginx ├── Dockerfile ├── mlflow.conf └── nginx.conf /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | __py_cache__ 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2023 A. Ganse 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for testing and populating of the docker/mlflow/db setup. 2 | # Warning: note 'make clean' will empty out your mlflow contents completely. 3 | 4 | # MLGWHOST variable is the address for accessing mlflow from inside the 5 | # mlflow_server docker container. 6 | MLGWHOST=$(shell docker inspect -f '{{ .NetworkSettings.Networks.docker_mlflow_db_default.Gateway }}' mlflow_server) 7 | # MLGWHOST=172.17.0.1 # may be ubuntu-specific 8 | # Localhost is used to access mlflow outside the docker container. 9 | MLFLOW_PORT=5000 10 | 11 | ALPHA = 0.0002 0.002 0.02 0.2 2.0 20.0 200.0 2000.0 12 | L1RATIO = 0.1 0.2 0.3 13 | EXPT = 'Testing1' 14 | 15 | 16 | start: 17 | # Default location in docker-compose.yml for artifact store is docker volume 18 | # but let's set it to local filesystem in makefile here for easy example runs. 19 | docker compose up -d --build 20 | 21 | stop: 22 | docker compose down 23 | 24 | clean: 25 | docker volume rm docker_mlflow_db_datapg_vol 26 | docker volume rm docker_mlflow_db_condaenv_vol 27 | docker volume rm docker_mlflow_db_mlruns_vol 28 | 29 | mlflowquickcheck: 30 | # Simple access check to mlflow server on host port; just lists experiments. 31 | docker exec \ 32 | -e MLFLOW_TRACKING_URI=http://${MLGWHOST}:${MLFLOW_PORT} \ 33 | mlflow_server \ 34 | mlflow experiments search # (in mlflow v1 use 'list', v2 use 'search') 35 | 36 | mlflowpopulate: 37 | # Populates entries in mlflow with the mlflow team's own mlflow-example. 38 | # First time is slow as conda-installs packages to condaenv_vol volume, 39 | # but runs quick after that via reusing same condaenv_vol volume. 40 | docker exec \ 41 | -e MLFLOW_TRACKING_URI=http://${MLGWHOST}:${MLFLOW_PORT} \ 42 | mlflow_server \ 43 | mlflow experiments create -n $(EXPT) &> /dev/null || \ 44 | echo Populating pre-existing experiment $(EXPT) 45 | @$(foreach loop_l1ratio, $(L1RATIO), \ 46 | $(foreach loop_alpha, $(ALPHA), \ 47 | echo passing params $(loop_l1ratio) $(loop_alpha) into loop; \ 48 | docker exec \ 49 | -e LD_LIBRARY_PATH=/opt/conda/lib \ 50 | -e MLFLOW_TRACKING_URI=http://${MLGWHOST}:${MLFLOW_PORT} \ 51 | mlflow_server \ 52 | mlflow run /home/mlflow-example \ 53 | -P alpha=$(loop_alpha) \ 54 | -P l1_ratio=$(loop_l1ratio) \ 55 | --experiment-name=$(EXPT) \ 56 | ;) \ 57 | ) 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # docker_mlflow_db 2 | 3 | A ready-to-run Docker container setup to quickly provide MLflow as a service, with optional 4 | database backend, optional storage of artifacts in AWS S3, and a reverse proxy 5 | frontend which could allow one to easily implement basic or secure authentication. 6 | 7 | > 8 | > :bulb: Note this repo is part of a trio that you might find useful together 9 | > (but all are separate tools that can be used independently): 10 | > 11 | > * [aganse/docker_mlflow_db](https://github.com/aganse/docker_mlflow_db): 12 | > ready-to-run MLflow server with PostgreSQL, AWS S3, Nginx 13 | > 14 | > * [aganse/py_tf2_gpu_dock_mlflow](https://github.com/aganse/py_tf2_gpu_dock_mlflow): 15 | > ready-to-run Python/Tensorflow2/MLflow setup to train models on GPU 16 | > 17 | > * [aganse/vim_mlflow](https://github.com/aganse/vim-mlflow): 18 | > a Vim plugin to browse the MLflow parameters and metrics instead of GUI 19 | > 20 |

 

21 | 22 | 23 | ## Summary 24 | The main use-case options available in this MLflow implementation are: 25 | * store the core MLflow info in a new separate standalone database instance, or 26 | in a pre-existing database instance elsewhere (including perhaps AWS RDS). 27 | Note a PostgreSQL database is assumed in this repo's setup, although altering 28 | to some other database would be a minimal change (mainly in the password file 29 | handling) 30 | * store the run artifact files (like model and graphic/plot files) in the local 31 | local filesystem, in a docker volume, or in an S3 bucket, 32 | * the default setup in this repo serves MLflow with its own database instance, 33 | and both database data and artifact files stored in their own docker volumes. 34 | 35 | There are several docker-compose.yaml files in the compose_variations 36 | subdirectory, any of which can be used in lieu of the docker-compose.yaml in the 37 | root directory to use the desired variation. 38 | 39 | In all variations, the additional nginx reverse-proxy on the front end allows 40 | for options such as: 41 | * using an htpasswd file in the nginx container to provide non-secure, basic 42 | logins for workgroup members behind an already-secure firewall, 43 | * implementing more full-fledged certficate-based secure access, 44 | * easily swapping out the nginx image with that some other comparable service 45 | (caddy for example). 46 | No secure access is implemented here, deemed outside the scope of this repo, 47 | but by having the reverse proxy in place and already correctly functional then 48 | one may focus one's effort for updates on just the reverse proxy component. 49 | 50 | ## To run and connect to MLflow 51 | 52 | An easy way to start the containers using separate new standalone db instance 53 | is to just let MLflow use the admin user account to access the database. 54 | (Not recommended for a database other than the standalone one, and be judicious 55 | about even that.) 56 | ```bash 57 | echo -n mydbadminpassword > ~/.pgadminpw # used when creating standalone db 58 | echo db:5432:mlflow:postgres:mydbadminpassword > ~/.pgpass # used by mlflow to save/get its results 59 | chmod 600 ~/.pg* 60 | make start 61 | ``` 62 | The first time it's run will be slower as it must download/build the containers, 63 | but after that first time it will start back up the existing containers and 64 | volumes. We can verify it's all up and ready via: 65 | ```bash 66 | > docker ps 67 | CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 68 | dc99e6fc8d80 mlflow_nginx "nginx -g 'daemon of…" 18 minutes ago Up 18 minutes 0.0.0.0:5000->80/tcp, :::5000->80/tcp mlflow_nginx 69 | 259ea89f1a9a mlflow_server "sh -c 'mlflow serve…" 19 minutes ago Up 18 minutes 5001/tcp mlflow_server 70 | 07bbead3e910 postgres:latest "docker-entrypoint.s…" 19 minutes ago Up 19 minutes 5432/tcp mlflow_db 71 | ``` 72 | 73 | When it's up we can access the MLFlow website at `http://localhost:5000`. If 74 | this is running on a remote machine without firewalled access, you could access 75 | via `http://remotehost:5000` (ie if the remote hostname were 'remotehost'), or 76 | if only access to remotehost is via ssh tunnel, then this command running in a 77 | separate terminal: 78 | ```bash 79 | ssh -CNL 5000:localhost:5000 @ 80 | ``` 81 | will allow you to access the MLFlow website via `http://localhost:5000` locally. 82 | If running on AWS, that ssh line might look something like: 83 | ```bash 84 | ssh -CNi "~/.ssh/my_awskey.pem" -L 5000:localhost:5000 ec2-user@12.34.56.78 85 | ``` 86 | 87 | You can shut the docker-compose all down via `make stop` which just runs a 88 | docker compose down command. 89 | 90 | There are a set of environment variables that can control the behavior of the 91 | implementation, but depending on one's needs one one may get away with not 92 | specifying any of them, simply using the defaults for all of them. Password 93 | for the database is supplied securely via a ~/.pgpass file, PostgreSQL's standard 94 | handling mechanism. 95 | 96 | Here are the possible env vars one may set, and their defaults which will be 97 | used if the variable is not explicitly set. For runs in the default setup you 98 | can start it up without setting any of these. 99 | ```bash 100 | # only bother with the ones you want to change from defaults 101 | export DB_NAME=mlflow 102 | export DB_USER=postgres # default is admin user of standalone database, but 103 | # in pre-existing database would use regular user account 104 | export DB_SERVER=db # 'db' is the name of the default standalone database 105 | # container, but DB_SERVER could be set to something like 106 | # mydatabaseserver.abcdefghij.us-west-2.rds.amazonaws.com 107 | export DB_PORT=5432 # port of database process 108 | export PGADMINPW=~/.pgadminpw # file containing pw to use for admin user of new standalone db (if used) 109 | export PGPASS=~/.pgpass # file containing pw to use for mlflow (DB_USER) account, in PostgreSQL pgpass format 110 | export FILESTORE=/storage/mlruns # if using filesystem for artifacts; unused if using S3 111 | export AWS_DEFAULT_REGION=us-west-2 # unused unless using S3 112 | export AWS_S3BUCKETURL=s3://mybucketname/myprefix/ # unused unless using S3 113 | export AWS_ACCESS_KEY_ID=xxxxxxxxxxxxxxxx # unused unless using S3 114 | export AWS_SECRET_ACCESS_KEY=xxxxxxxxxxxxxxxx # unused unless using S3 115 | ``` 116 | 117 | *Warning:* 118 | Note regardless of the mechanisms noted above, it's important to note that the 119 | public domain version of MLflow is still fundamentally insecure, with no user logins. 120 | One should run this strictly on a secure, company-internal, firewalled intranet 121 | and/or wrapped within some secure/https, internet-facing layer. 122 | Overall the typical use-case here is individual or small-group usage contained 123 | inside a company's internal network behind a firewall, so not at the top of my 124 | concern list. Please beware for use-cases beyond that. 125 | 126 | 127 | ## A few other functionalities to note 128 | 129 | The makefile contains the following two macros which can be useful in testing 130 | and development: 131 | 132 | * `make mlflowquickcheck` just outputs the MLflow experiments list as a 133 | connectivity test, answering the basic question of "is it working?" 134 | 135 | * `make mlflowpopulate` runs the small, quick-running example project 136 | 'mlflow-example' to generate some example/test contents in your MLflow 137 | instance. This test content in a rapidly-spun-up mlflow instance can be 138 | really useful when testing other tools such as the 139 | [vim-mlflow](https://github.com/aganse/vim-mlflow) Vim plugin. 140 | 141 | 142 | ## Relevant links 143 | 144 | Initial implementation was originally based on 145 | [Guillaume Androz's 10-Jan-2020 Toward-Data-Science post, "Deploy MLflow with docker compose"](https://towardsdatascience.com/deploy-mlflow-with-docker-compose-8059f16b6039) (thanks for getting me started!) 146 | 147 | Other links:
148 | https://github.com/ymym3412/mlflow-docker-compose 149 | https://medium.com/vantageai/keeping-your-ml-model-in-shape-with-kafka-airflow-and-mlflow-143d20024ba6 150 | https://docs.nginx.com/nginx/admin-guide/security-controls/configuring-http-basic-authentication/ 151 | https://www.digitalocean.com/community/tutorials/how-to-set-up-password-authentication-with-nginx-on-ubuntu-14-04 152 | https://www.digitalocean.com/community/tutorials/how-to-set-up-http-authentication-with-nginx-on-ubuntu-12-10 153 | -------------------------------------------------------------------------------- /compose_variations/README.md: -------------------------------------------------------------------------------- 1 | The goal here was to have these files here as ready-to-go variations for 2 | different DBs and configs. But fyi in practice I've only actually focused on 3 | the Postgresql-based scripts, so the sqlite and mysql based scripts may be out 4 | of date or may need a tweak to work, and additionally I've mainly focused on 5 | the docker-compose.yaml in root dir so there may be a few other differences. 6 | 7 | 8 | | file | description | 9 | |------------------------------------|-------------| 10 | |docker-compose.mlflow_existingpostgres.yaml|connect mlflow to pre-existing postgres mlflow backend database (repo default)| 11 | |docker-compose.mlflow_newpostgres.yaml|stand up a new postgres db and connect mlflow to it for backend| 12 | |docker-compose.mlflow_sqlite.yaml |stand up a new sqlite db and connect mlflow to it for backend| 13 | |docker-compose.mlflow_mysql.yaml |stand up a new mysql db and connect mlflow to it for backend| 14 | |docker-compose.orig.yaml |original scripts from [Guillaume Androz's 10-Jan-2020 Toward-Data-Science article](https://towardsdatascience.com/deploy-mlflow-with-docker-compose-8059f16b6039) per README.md| 15 | 16 | 17 | To use one of these just copy it to ../docker-compose.yaml. 18 | Note the repo's default docker-compose.yaml in its root directory is 19 | docker-compose.mlflow_existingpostgres.yaml to begin with. 20 | 21 | -------------------------------------------------------------------------------- /compose_variations/docker-compose.mlflow_existingpostgres.yaml: -------------------------------------------------------------------------------- 1 | # This version hides the underlying postgres database on a backend network 2 | # different from the frontend network on which nginx interfaces mlflow. 3 | # And mlflow is hidden behind nginx, allowing user auth to be implemented. 4 | # 5 | # The following environment vars can be set in the shell before running 6 | # docker-compose (default values are these; at minimum change DB_PW). 7 | # export DB_SERVER=db # defaults to db container; can replace with ip address 8 | # export DB_NAME=mlflow 9 | # export DB_USER=postgres 10 | # export PGPASS=~/.pgpass # path of .pgpass file containing db pw 11 | # export AWSCREDS=~/.aws # path of AWS credentials files 12 | # export DB_PORT=5432 13 | # export FILESTORE=mlruns_vol # defaults to docker vol; can replace with dir 14 | # export MLFLOW_PORT=5000 15 | # export AWS_S3BUCKETURL=s3://mybucket/myprefix/ # no quotes 16 | # export AWS_REGION=us-west-2 # no quotes 17 | # Those defaults are set automatically - you only need specify the ones you 18 | # want to change. 19 | # 20 | # Note artifacts can be stored in an AWS S3 bucket, in a docker volume called 21 | # mlruns_vol, or a local drive, via choice of the ARTIFACTS_ROOT variable. 22 | 23 | version: '3.3' 24 | 25 | services: 26 | app: 27 | restart: always 28 | build: ./mlflow 29 | image: mlflow_server 30 | container_name: mlflow_server 31 | expose: 32 | - 5001 33 | environment: 34 | - BACKEND=postgresql://${DB_USER:-mlflow}@${DB_SERVER:-db}:${DB_PORT:-5432}/${DB_NAME:-mlflow} 35 | # - ARTIFACTS_ROOT=/mlruns # for artifact store in locally mapped volume (below) 36 | - ARTIFACTS_ROOT=mlflow-artifacts:/ # for artifact store in AWS S3 (or delete this var) 37 | - ARTIFACTS_DEST=${AWS_S3BUCKETURL} 38 | - AWS_DEFAULT_REGION=${AWS_REGION} 39 | # If using AWS the credentials file(s) must be in the .aws directory mapped below. 40 | volumes: 41 | - ${FILESTORE:-mlruns_vol}:/mlruns # ignored if using S3 42 | - ${PGPASS:-~/.pgpass}:/root/.pgpass # provides pw for mlflow database, outside of container 43 | - ${AWSCREDS:-~/.aws}:/root/.aws # provides AWS creds, outside of container 44 | - condaenv_vol:/opt/conda # provides continuity/speed when looping runs with same container 45 | command: 46 | - sh # (sh form here allows for var substitution of BACKEND and ARTIFACTS vars) 47 | - -c 48 | - mlflow server 49 | --port 5001 50 | --host 0.0.0.0 51 | --backend-store-uri $${BACKEND} 52 | --default-artifact-root $${ARTIFACTS_ROOT} 53 | --artifacts-destination $${ARTIFACTS_DEST} 54 | --serve-artifacts 55 | 56 | nginx: 57 | restart: always 58 | build: ./nginx 59 | image: mlflow_nginx 60 | container_name: mlflow_nginx 61 | ports: 62 | - "${MLFLOW_PORT:-5000}:80" 63 | depends_on: 64 | - app 65 | 66 | volumes: 67 | mlruns_vol: 68 | condaenv_vol: 69 | -------------------------------------------------------------------------------- /compose_variations/docker-compose.mlflow_mysql.yaml: -------------------------------------------------------------------------------- 1 | # This version exposes the underlying mysql database as well as the mlflow 2 | # server, so we can access the database contents directly. 3 | # 4 | # Have the following environment vars set in shell before running docker-compose 5 | # (suggested values here but can use whatever desired): 6 | # export DB_NAME=mlflowdb 7 | # export DB_USER=mluser 8 | # export DB_PW= 9 | # export DB_ROOTPW= 10 | # export DB_PORT=3306 11 | # export MLFLOW_PORT=5001 12 | # 13 | # AWS S3 bucket can be used instead of local drive for artifacts store via 14 | # commented-out environment lines below. 15 | 16 | version: '3.3' 17 | 18 | services: 19 | db: 20 | restart: always 21 | image: mysql/mysql-server:5.7.28 22 | container_name: mlflow_db 23 | networks: 24 | - mydefault 25 | expose: 26 | - ${DB_PORT} 27 | ports: 28 | - "${DB_PORT}:${DB_PORT}" 29 | environment: 30 | - MYSQL_DATABASE=${DB_NAME} 31 | - MYSQL_USER=${DB_USER} 32 | - MYSQL_PASSWORD=${DB_PW} 33 | - MYSQL_ROOT_PASSWORD=${DB_ROOTPW} 34 | volumes: 35 | - db_datams:/var/lib/mysql 36 | 37 | app: 38 | restart: always 39 | build: ./mlflow 40 | image: mlflow_server 41 | container_name: mlflow_server 42 | networks: 43 | - mydefault 44 | expose: 45 | - ${MLFLOW_PORT} 46 | ports: 47 | - "${MLFLOW_PORT}:${MLFLOW_PORT}" 48 | environment: 49 | - BACKEND=mysql+pymysql://${DB_USER}:${DB_PW}@db:${DB_PORT}/${DB_NAME} 50 | - ARTIFACTS=/mlruns 51 | # For artifact store in AWS S3 (note boto was installed in container): 52 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 53 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 54 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION} 55 | # - ARTIFACTS="s3://mlflow_bucket/mlflow/" 56 | volumes: 57 | - mlrun_data:/mlruns 58 | command: 59 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS) 60 | - -c 61 | - mlflow server 62 | --port ${MLFLOW_PORT} 63 | --host 0.0.0.0 64 | --backend-store-uri $${BACKEND} 65 | --default-artifact-root $${ARTIFACTS} 66 | depends_on: 67 | - db 68 | 69 | # nginx: 70 | # restart: always 71 | # build: ./nginx 72 | # image: mlflow_nginx 73 | # container_name: mlflow_nginx 74 | # ports: 75 | # - "80:80" 76 | # networks: 77 | # - frontend 78 | # depends_on: 79 | # - web 80 | 81 | networks: 82 | mydefault: 83 | driver: bridge 84 | 85 | volumes: 86 | db_datams: 87 | mlrun_data: 88 | -------------------------------------------------------------------------------- /compose_variations/docker-compose.mlflow_newpostgres.yaml: -------------------------------------------------------------------------------- 1 | # This version hides the underlying postgres database on a backend network 2 | # different from the frontend network on which nginx interfaces mlflow. 3 | # And mlflow is hidden behind nginx, allowing user auth to be implemented. 4 | # 5 | # The following environment vars can be set in the shell before running 6 | # docker-compose (default values are these; at minimum change DB_PW). 7 | # export DB_NAME=mlflow 8 | # export DB_USER=postgres 9 | # export DB_SERVER=db # ie defaults to db container; can replace with ip address 10 | # export PGPASS=~/.pgpass # path of .pgpass file (contains pw) 11 | # export DB_PORT=5432 12 | # export FILESTORE=mlruns_vol # ie defaults to docker volume; can replace with filesys dir 13 | # export MLFLOW_PORT=5000 14 | # Those defaults are set automatically so you only need specify the ones you 15 | # want to change - eg a new DB_PW value should be set but the rest are optional. 16 | # 17 | # Note an AWS S3 bucket can be used instead of local drive for the artifacts 18 | # store, via the commented-out environment lines below. 19 | 20 | version: '3.3' 21 | 22 | services: 23 | # If using external DB_SERVER, comment out this db container 24 | db: 25 | restart: always 26 | image: postgres:13 27 | container_name: mlflow_db 28 | expose: 29 | - ${DB_PORT:-5432} 30 | # networks: 31 | # - backend 32 | environment: 33 | # - MUID=$UID 34 | # - MGID=$GID 35 | - POSTGRES_DB=${DB_NAME:-mlflow} 36 | - POSTGRES_USER=${DB_USER:-postgres} 37 | - POSTGRES_PASSWORD_FILE=/run/secrets/pg_admin_pw 38 | secrets: 39 | - pg_admin_pw 40 | volumes: 41 | - datapg_vol:/var/lib/postgresql/data 42 | 43 | app: 44 | restart: always 45 | build: ./mlflow 46 | image: mlflow_server 47 | container_name: mlflow_server 48 | expose: 49 | - 5001 50 | # networks: 51 | # - frontend 52 | # - backend 53 | environment: 54 | - BACKEND=postgresql://${DB_USER:-postgres}@${DB_SERVER:-db}:${DB_PORT:-5432}/${DB_NAME:-mlflow} 55 | - ARTIFACTS=/mlruns # in-container path to filestore in filesys 56 | # For artifact store in AWS S3 (uses boto that was installed in container): 57 | # Commment out ARTIFACTS line above and instead use: 58 | # - ARTIFACTS="s3://mlflow_bucket/my_mlflow_dir/" 59 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 60 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 61 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION} 62 | volumes: 63 | - ${FILESTORE:-mlruns_vol}:/mlruns # can comment out this line if using S3 64 | - ${PGPASS:-~/.pgpass}:/root/.pgpass # provides the pw for BACKEND database 65 | - condaenv_vol:/opt/conda # provides continuity/speed when looping runs with same container 66 | command: 67 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS) 68 | - -c 69 | - mlflow server 70 | --port 5001 71 | --host 0.0.0.0 72 | --backend-store-uri $${BACKEND} 73 | --default-artifact-root $${ARTIFACTS} 74 | # depends_on: 75 | # - db 76 | 77 | nginx: 78 | restart: always 79 | build: ./nginx 80 | image: mlflow_nginx 81 | container_name: mlflow_nginx 82 | ports: 83 | - "${MLFLOW_PORT:-5000}:80" 84 | # networks: 85 | # - frontend 86 | depends_on: 87 | - app 88 | 89 | # networks: 90 | # frontend: 91 | # driver: bridge 92 | # backend: 93 | # driver: bridge 94 | 95 | secrets: 96 | pg_admin_pw: 97 | file: ~/.pgadminpw 98 | 99 | volumes: 100 | mlruns_vol: 101 | datapg_vol: 102 | condaenv_vol: 103 | -------------------------------------------------------------------------------- /compose_variations/docker-compose.mlflow_postgres.yaml: -------------------------------------------------------------------------------- 1 | # This version exposes the underlying postgres database as well as the mlflow 2 | # server, so we can access the database contents directly. 3 | # 4 | # Have the following environment vars set in shell before running docker-compose 5 | # (suggested values here but can use whatever desired): 6 | # export DB_NAME=mlflowdb 7 | # export DB_USER=postgres 8 | # export DB_PW= 9 | # export DB_PORT=5432 10 | # export MLFLOW_PORT=5001 11 | # 12 | # AWS S3 bucket can be used instead of local drive for artifacts store via 13 | # commented-out environment lines below. 14 | 15 | version: '3.3' 16 | 17 | services: 18 | db: 19 | restart: always 20 | image: postgres:latest 21 | container_name: mlflow_db 22 | expose: 23 | - ${DB_PORT} 24 | networks: 25 | - mydefault 26 | ports: 27 | - "${DB_PORT}:${DB_PORT}" 28 | environment: 29 | - POSTGRES_DB=${DB_NAME} 30 | - POSTGRES_USER=${DB_USER} 31 | - POSTGRES_PASSWORD=${DB_PW} 32 | volumes: 33 | - db_datapg:/var/lib/postgresql/data 34 | 35 | app: 36 | restart: always 37 | build: ./mlflow 38 | image: mlflow_server 39 | container_name: mlflow_server 40 | networks: 41 | - mydefault 42 | expose: 43 | - ${MLFLOW_PORT} 44 | ports: 45 | - "${MLFLOW_PORT}:${MLFLOW_PORT}" 46 | environment: 47 | - BACKEND=postgresql://${DB_USER}:${DB_PW}@db:${DB_PORT}/${DB_NAME} 48 | - ARTIFACTS=/mlruns 49 | # For artifact store in AWS S3 (note boto was installed in container): 50 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 51 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 52 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION} 53 | # - ARTIFACTS="s3://mlflow_bucket/mlflow/" 54 | volumes: 55 | - mlrun_data:/mlruns 56 | 57 | command: 58 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS) 59 | - -c 60 | - mlflow server 61 | --port ${MLFLOW_PORT} 62 | --host 0.0.0.0 63 | --backend-store-uri $${BACKEND} 64 | --default-artifact-root $${ARTIFACTS} 65 | depends_on: 66 | - db 67 | 68 | 69 | 70 | networks: 71 | mydefault: 72 | driver: bridge 73 | 74 | volumes: 75 | db_datapg: 76 | mlrun_data: 77 | -------------------------------------------------------------------------------- /compose_variations/docker-compose.mlflow_postgres_nginx.yaml: -------------------------------------------------------------------------------- 1 | # This version hides the underlying postgres database on a backend network 2 | # different from the frontend network on which nginx interfaces mlflow. 3 | # And mlflow is hidden behind nginx, allowing user auth to be implemented. 4 | # 5 | # Have the following environment vars set in shell before running docker-compose 6 | # (suggested values here but can use whatever desired): 7 | # export DB_NAME=mlflowdb 8 | # export DB_USER=postgres 9 | # export DB_PW= 10 | # export DB_PORT=5432 11 | # export MLFLOW_PORT=5000 12 | # 13 | # AWS S3 bucket can be used instead of local drive for artifacts store via 14 | # commented-out environment lines below. 15 | 16 | version: '3.3' 17 | 18 | services: 19 | db: 20 | restart: always 21 | image: postgres:latest 22 | container_name: mlflow_db 23 | expose: 24 | - ${DB_PORT} 25 | networks: 26 | - backend 27 | environment: 28 | - MUID=$UID 29 | - MGID=$GID 30 | - POSTGRES_DB=${DB_NAME} 31 | - POSTGRES_USER=${DB_USER} 32 | - POSTGRES_PASSWORD=${DB_PW} 33 | volumes: 34 | - db_datapg:/var/lib/postgresql/data 35 | 36 | app: 37 | restart: always 38 | build: ./mlflow 39 | image: mlflow_server 40 | container_name: mlflow_server 41 | expose: 42 | - 5001 43 | networks: 44 | - frontend 45 | - backend 46 | environment: 47 | - BACKEND=postgresql://${DB_USER}:${DB_PW}@db:${DB_PORT}/${DB_NAME} 48 | - ARTIFACTS=/mlruns 49 | # For artifact store in AWS S3 (note boto was installed in container): 50 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 51 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 52 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION} 53 | # - ARTIFACTS="s3://mlflow_bucket/mlflow/" 54 | volumes: 55 | - mlrun_data:/mlruns 56 | 57 | command: 58 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS) 59 | - -c 60 | - mlflow server 61 | --port 5001 62 | --host 0.0.0.0 63 | --backend-store-uri $${BACKEND} 64 | --default-artifact-root $${ARTIFACTS} 65 | depends_on: 66 | - db 67 | 68 | nginx: 69 | restart: always 70 | build: ./nginx 71 | image: mlflow_nginx 72 | container_name: mlflow_nginx 73 | ports: 74 | - "${MLFLOW_PORT}:80" 75 | networks: 76 | - frontend 77 | depends_on: 78 | - app 79 | 80 | networks: 81 | frontend: 82 | driver: bridge 83 | backend: 84 | driver: bridge 85 | 86 | volumes: 87 | db_datapg: 88 | mlrun_data: 89 | -------------------------------------------------------------------------------- /compose_variations/docker-compose.mlflow_sqlite.yaml: -------------------------------------------------------------------------------- 1 | # Have the following environment vars set in shell before running docker-compose 2 | # (suggested values here but can use whatever desired): 3 | # export DB_NAME=mlflowdb 4 | # export MLFLOW_PORT=5001 5 | 6 | version: '3.3' 7 | 8 | services: 9 | app: 10 | restart: always 11 | build: ./mlflow 12 | image: mlflow_server 13 | container_name: mlflow_server 14 | # networks: 15 | # - frontend 16 | expose: 17 | - ${MLFLOW_PORT} 18 | ports: 19 | - "${MLFLOW_PORT}:${MLFLOW_PORT}" 20 | environment: 21 | - BACKEND=sqlite:///db/${DB_NAME}.db 22 | - ARTIFACTS=/mlruns 23 | # For artifact store in AWS S3 (note boto was installed in container): 24 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 25 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 26 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION} 27 | # - ARTIFACTS="s3://mlflow_bucket/mlflow/" 28 | volumes: 29 | - mlrun_data:/mlruns 30 | - sqlitedb_data:/db 31 | 32 | command: 33 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS) 34 | - -c 35 | - mlflow server 36 | --port ${MLFLOW_PORT} 37 | --host 0.0.0.0 38 | --backend-store-uri $${BACKEND} 39 | --default-artifact-root $${ARTIFACTS} 40 | 41 | # still useful if we tweak to handle htpasswd basic authentication to gate users 42 | # nginx: 43 | # restart: always 44 | # build: ./nginx 45 | # image: mlflow_nginx 46 | # container_name: mlflow_nginx 47 | # ports: 48 | # - "80:80" 49 | # networks: 50 | # - frontend 51 | # - backend 52 | # depends_on: 53 | # - app 54 | 55 | # networks: 56 | # frontend: 57 | # driver: bridge 58 | # backend: 59 | # driver: bridge 60 | 61 | volumes: 62 | mlrun_data: 63 | sqlitedb_data: 64 | -------------------------------------------------------------------------------- /compose_variations/docker-compose.orig.yaml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | 3 | services: 4 | db: 5 | restart: always 6 | image: mysql/mysql-server:5.7.28 7 | container_name: mlflow_db 8 | expose: 9 | - "3306" 10 | networks: 11 | - backend 12 | environment: 13 | - MYSQL_DATABASE=${MYSQL_DATABASE} 14 | - MYSQL_USER=${MYSQL_USER} 15 | - MYSQL_PASSWORD=${MYSQL_PASSWORD} 16 | - MYSQL_ROOT_PASSWORD=${MYSQL_ROOT_PASSWORD} 17 | volumes: 18 | - dbdata:/var/lib/mysql 19 | 20 | web: 21 | restart: always 22 | build: ./mlflow 23 | image: mlflow_server 24 | container_name: mlflow_server 25 | expose: 26 | - "5000" 27 | networks: 28 | - frontend 29 | - backend 30 | environment: 31 | - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 32 | - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 33 | - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION} 34 | command: mlflow server --backend-store-uri mysql+pymysql://${MYSQL_USER}:${MYSQL_PASSWORD}@db:3306/${MYSQL_DATABASE} --default-artifact-root s3://mlflow_bucket/mlflow/ --host 0.0.0.0 35 | 36 | nginx: 37 | restart: always 38 | build: ./nginx 39 | image: mlflow_nginx 40 | container_name: mlflow_nginx 41 | ports: 42 | - "80:80" 43 | networks: 44 | - frontend 45 | depends_on: 46 | - web 47 | 48 | networks: 49 | frontend: 50 | driver: bridge 51 | backend: 52 | driver: bridge 53 | 54 | volumes: 55 | dbdata: 56 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | # This version hides the underlying postgres database on a backend network 2 | # different from the frontend network on which nginx interfaces mlflow. 3 | # And mlflow is hidden behind nginx, allowing user auth to be implemented. 4 | # 5 | # The following environment vars can be set in the shell before running 6 | # docker-compose (default values are these; at minimum change DB_PW). 7 | # export DB_NAME=mlflow 8 | # export DB_USER=postgres 9 | # export DB_SERVER=db # ie defaults to db container; can replace with ip address 10 | # export PGPASS=~/.pgpass # path of .pgpass file (contains pw) 11 | # export DB_PORT=5432 12 | # export FILESTORE=mlruns_vol # ie defaults to docker volume; can replace with filesys dir 13 | # export MLFLOW_PORT=5000 14 | # Those defaults are set automatically so you only need specify the ones you 15 | # want to change - eg a new DB_PW value should be set but the rest are optional. 16 | # 17 | # Note an AWS S3 bucket can be used instead of local drive for the artifacts 18 | # store, via the commented-out environment lines below. 19 | 20 | version: '3.3' 21 | 22 | services: 23 | db: 24 | restart: always 25 | image: postgres:13 26 | container_name: mlflow_db 27 | expose: 28 | - ${DB_PORT:-5432} 29 | # networks: 30 | # - backend 31 | environment: 32 | # - MUID=$UID 33 | # - MGID=$GID 34 | - POSTGRES_DB=${DB_NAME:-mlflow} 35 | - POSTGRES_USER=${DB_USER:-postgres} 36 | - POSTGRES_PASSWORD_FILE=/run/secrets/pg_admin_pw 37 | secrets: 38 | - pg_admin_pw 39 | volumes: 40 | - datapg_vol:/var/lib/postgresql/data 41 | 42 | app: 43 | restart: always 44 | build: ./mlflow 45 | image: mlflow_server 46 | container_name: mlflow_server 47 | expose: 48 | - 5001 49 | # networks: 50 | # - frontend 51 | # - backend 52 | environment: 53 | - BACKEND=postgresql://${DB_USER:-postgres}@${DB_SERVER:-db}:${DB_PORT:-5432}/${DB_NAME:-mlflow} 54 | - ARTIFACTS=/storage/mlruns # in-container path to filestore in filesys 55 | # For artifact store in AWS S3 (uses boto that was installed in container): 56 | # Commment out ARTIFACTS line above and instead use: 57 | # - ARTIFACTS="s3://mlflow_bucket/my_mlflow_dir/" 58 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 59 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 60 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION} 61 | volumes: 62 | - ${FILESTORE:-/storage/mlruns}:/storage/mlruns # can comment out this line if using S3 63 | - ${PGPASS:-~/.pgpass}:/root/.pgpass # provides the pw for BACKEND database 64 | - condaenv_vol:/opt/conda # provides continuity/speed when looping runs with same container 65 | command: 66 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS) 67 | - -c 68 | - mlflow server 69 | --port 5001 70 | --host 0.0.0.0 71 | --backend-store-uri $${BACKEND} 72 | --default-artifact-root $${ARTIFACTS} 73 | # depends_on: 74 | # - db 75 | 76 | nginx: 77 | restart: always 78 | build: ./nginx 79 | image: mlflow_nginx 80 | container_name: mlflow_nginx 81 | ports: 82 | - "${MLFLOW_PORT:-5000}:80" 83 | # networks: 84 | # - frontend 85 | depends_on: 86 | - app 87 | 88 | # networks: 89 | # frontend: 90 | # driver: bridge 91 | # backend: 92 | # driver: bridge 93 | 94 | secrets: 95 | pg_admin_pw: 96 | file: ~/.pgadminpw 97 | 98 | volumes: 99 | mlruns_vol: 100 | datapg_vol: 101 | condaenv_vol: 102 | -------------------------------------------------------------------------------- /mlflow/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | RUN apt-get update && apt-get install -y git 3 | 4 | RUN conda install -c conda-forge shap # note shap's binary pkg not available in pip 5 | RUN pip install mlflow psycopg2-binary pymysql boto3 6 | 7 | RUN cd /home && git clone https://github.com/mlflow/mlflow-example.git 8 | -------------------------------------------------------------------------------- /nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx:1.17.6 2 | # Remove default Nginx config 3 | RUN rm /etc/nginx/nginx.conf 4 | # Copy the modified Nginx conf 5 | COPY nginx.conf /etc/nginx 6 | # Copy proxy config 7 | COPY mlflow.conf /etc/nginx/sites-enabled/ 8 | -------------------------------------------------------------------------------- /nginx/mlflow.conf: -------------------------------------------------------------------------------- 1 | # Define the parameters for a specific virtual host/server 2 | server { 3 | # Define the server name, IP address, and/or port of the server 4 | listen 80; 5 | 6 | # Define the specified charset to the “Content-Type” response header field 7 | charset utf-8; 8 | 9 | # Configure NGINX to reverse proxy HTTP requests to the upstream server (uWSGI server) 10 | location / { 11 | # Define the location of the proxy server to send the request to 12 | proxy_pass http://app:5001; 13 | 14 | # Redefine the header fields that NGINX sends to the upstream server 15 | proxy_set_header Host $host; 16 | proxy_set_header X-Real-IP $remote_addr; 17 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | # Define the user that will own and run the Nginx server 2 | user nginx; 3 | # Define the number of worker processes; recommended value is the number of 4 | # cores that are being used by your server 5 | worker_processes 1; 6 | 7 | # Define the location on the file system of the error log, plus the minimum 8 | # severity to log messages for 9 | error_log /var/log/nginx/error.log warn; 10 | # Define the file that will store the process ID of the main NGINX process 11 | pid /var/run/nginx.pid; 12 | 13 | 14 | # events block defines the parameters that affect connection processing. 15 | events { 16 | # Define the maximum number of simultaneous connections that can be opened by a worker process 17 | worker_connections 1024; 18 | } 19 | 20 | 21 | # http block defines the parameters for how NGINX should handle HTTP web traffic 22 | http { 23 | # Include the file defining the list of file types that are supported by NGINX 24 | include /etc/nginx/mime.types; 25 | # Define the default file type that is returned to the user 26 | default_type text/html; 27 | 28 | # Define the format of log messages. 29 | log_format main '$remote_addr - $remote_user [$time_local] "$request" ' 30 | '$status $body_bytes_sent "$http_referer" ' 31 | '"$http_user_agent" "$http_x_forwarded_for"'; 32 | 33 | # Define the location of the log of access attempts to NGINX 34 | access_log /var/log/nginx/access.log main; 35 | 36 | # Define the parameters to optimize the delivery of static content 37 | sendfile on; 38 | tcp_nopush on; 39 | tcp_nodelay on; 40 | 41 | # Define the timeout value for keep-alive connections with the client 42 | keepalive_timeout 65; 43 | 44 | # Define the usage of the gzip compression algorithm to reduce the amount of data to transmit 45 | #gzip on; 46 | 47 | # Include additional parameters for virtual host(s)/server(s) 48 | include /etc/nginx/sites-enabled/*.conf; 49 | } 50 | --------------------------------------------------------------------------------