├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── compose_variations
├── README.md
├── docker-compose.mlflow_existingpostgres.yaml
├── docker-compose.mlflow_mysql.yaml
├── docker-compose.mlflow_newpostgres.yaml
├── docker-compose.mlflow_postgres.yaml
├── docker-compose.mlflow_postgres_nginx.yaml
├── docker-compose.mlflow_sqlite.yaml
└── docker-compose.orig.yaml
├── docker-compose.yaml
├── mlflow
└── Dockerfile
└── nginx
├── Dockerfile
├── mlflow.conf
└── nginx.conf
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 | __py_cache__
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .nox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | *.py,cover
52 | .hypothesis/
53 | .pytest_cache/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # pipenv
89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
92 | # install all needed dependencies.
93 | #Pipfile.lock
94 |
95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
96 | __pypackages__/
97 |
98 | # Celery stuff
99 | celerybeat-schedule
100 | celerybeat.pid
101 |
102 | # SageMath parsed files
103 | *.sage.py
104 |
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 |
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 |
118 | # Rope project settings
119 | .ropeproject
120 |
121 | # mkdocs documentation
122 | /site
123 |
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 |
129 | # Pyre type checker
130 | .pyre/
131 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright © 2023 A. Ganse
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for testing and populating of the docker/mlflow/db setup.
2 | # Warning: note 'make clean' will empty out your mlflow contents completely.
3 |
4 | # MLGWHOST variable is the address for accessing mlflow from inside the
5 | # mlflow_server docker container.
6 | MLGWHOST=$(shell docker inspect -f '{{ .NetworkSettings.Networks.docker_mlflow_db_default.Gateway }}' mlflow_server)
7 | # MLGWHOST=172.17.0.1 # may be ubuntu-specific
8 | # Localhost is used to access mlflow outside the docker container.
9 | MLFLOW_PORT=5000
10 |
11 | ALPHA = 0.0002 0.002 0.02 0.2 2.0 20.0 200.0 2000.0
12 | L1RATIO = 0.1 0.2 0.3
13 | EXPT = 'Testing1'
14 |
15 |
16 | start:
17 | # Default location in docker-compose.yml for artifact store is docker volume
18 | # but let's set it to local filesystem in makefile here for easy example runs.
19 | docker compose up -d --build
20 |
21 | stop:
22 | docker compose down
23 |
24 | clean:
25 | docker volume rm docker_mlflow_db_datapg_vol
26 | docker volume rm docker_mlflow_db_condaenv_vol
27 | docker volume rm docker_mlflow_db_mlruns_vol
28 |
29 | mlflowquickcheck:
30 | # Simple access check to mlflow server on host port; just lists experiments.
31 | docker exec \
32 | -e MLFLOW_TRACKING_URI=http://${MLGWHOST}:${MLFLOW_PORT} \
33 | mlflow_server \
34 | mlflow experiments search # (in mlflow v1 use 'list', v2 use 'search')
35 |
36 | mlflowpopulate:
37 | # Populates entries in mlflow with the mlflow team's own mlflow-example.
38 | # First time is slow as conda-installs packages to condaenv_vol volume,
39 | # but runs quick after that via reusing same condaenv_vol volume.
40 | docker exec \
41 | -e MLFLOW_TRACKING_URI=http://${MLGWHOST}:${MLFLOW_PORT} \
42 | mlflow_server \
43 | mlflow experiments create -n $(EXPT) &> /dev/null || \
44 | echo Populating pre-existing experiment $(EXPT)
45 | @$(foreach loop_l1ratio, $(L1RATIO), \
46 | $(foreach loop_alpha, $(ALPHA), \
47 | echo passing params $(loop_l1ratio) $(loop_alpha) into loop; \
48 | docker exec \
49 | -e LD_LIBRARY_PATH=/opt/conda/lib \
50 | -e MLFLOW_TRACKING_URI=http://${MLGWHOST}:${MLFLOW_PORT} \
51 | mlflow_server \
52 | mlflow run /home/mlflow-example \
53 | -P alpha=$(loop_alpha) \
54 | -P l1_ratio=$(loop_l1ratio) \
55 | --experiment-name=$(EXPT) \
56 | ;) \
57 | )
58 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # docker_mlflow_db
2 |
3 | A ready-to-run Docker container setup to quickly provide MLflow as a service, with optional
4 | database backend, optional storage of artifacts in AWS S3, and a reverse proxy
5 | frontend which could allow one to easily implement basic or secure authentication.
6 |
7 | >
8 | > :bulb: Note this repo is part of a trio that you might find useful together
9 | > (but all are separate tools that can be used independently):
10 | >
11 | > * [aganse/docker_mlflow_db](https://github.com/aganse/docker_mlflow_db):
12 | > ready-to-run MLflow server with PostgreSQL, AWS S3, Nginx
13 | >
14 | > * [aganse/py_tf2_gpu_dock_mlflow](https://github.com/aganse/py_tf2_gpu_dock_mlflow):
15 | > ready-to-run Python/Tensorflow2/MLflow setup to train models on GPU
16 | >
17 | > * [aganse/vim_mlflow](https://github.com/aganse/vim-mlflow):
18 | > a Vim plugin to browse the MLflow parameters and metrics instead of GUI
19 | >
20 |
21 |
22 |
23 | ## Summary
24 | The main use-case options available in this MLflow implementation are:
25 | * store the core MLflow info in a new separate standalone database instance, or
26 | in a pre-existing database instance elsewhere (including perhaps AWS RDS).
27 | Note a PostgreSQL database is assumed in this repo's setup, although altering
28 | to some other database would be a minimal change (mainly in the password file
29 | handling)
30 | * store the run artifact files (like model and graphic/plot files) in the local
31 | local filesystem, in a docker volume, or in an S3 bucket,
32 | * the default setup in this repo serves MLflow with its own database instance,
33 | and both database data and artifact files stored in their own docker volumes.
34 |
35 | There are several docker-compose.yaml files in the compose_variations
36 | subdirectory, any of which can be used in lieu of the docker-compose.yaml in the
37 | root directory to use the desired variation.
38 |
39 | In all variations, the additional nginx reverse-proxy on the front end allows
40 | for options such as:
41 | * using an htpasswd file in the nginx container to provide non-secure, basic
42 | logins for workgroup members behind an already-secure firewall,
43 | * implementing more full-fledged certficate-based secure access,
44 | * easily swapping out the nginx image with that some other comparable service
45 | (caddy for example).
46 | No secure access is implemented here, deemed outside the scope of this repo,
47 | but by having the reverse proxy in place and already correctly functional then
48 | one may focus one's effort for updates on just the reverse proxy component.
49 |
50 | ## To run and connect to MLflow
51 |
52 | An easy way to start the containers using separate new standalone db instance
53 | is to just let MLflow use the admin user account to access the database.
54 | (Not recommended for a database other than the standalone one, and be judicious
55 | about even that.)
56 | ```bash
57 | echo -n mydbadminpassword > ~/.pgadminpw # used when creating standalone db
58 | echo db:5432:mlflow:postgres:mydbadminpassword > ~/.pgpass # used by mlflow to save/get its results
59 | chmod 600 ~/.pg*
60 | make start
61 | ```
62 | The first time it's run will be slower as it must download/build the containers,
63 | but after that first time it will start back up the existing containers and
64 | volumes. We can verify it's all up and ready via:
65 | ```bash
66 | > docker ps
67 | CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
68 | dc99e6fc8d80 mlflow_nginx "nginx -g 'daemon of…" 18 minutes ago Up 18 minutes 0.0.0.0:5000->80/tcp, :::5000->80/tcp mlflow_nginx
69 | 259ea89f1a9a mlflow_server "sh -c 'mlflow serve…" 19 minutes ago Up 18 minutes 5001/tcp mlflow_server
70 | 07bbead3e910 postgres:latest "docker-entrypoint.s…" 19 minutes ago Up 19 minutes 5432/tcp mlflow_db
71 | ```
72 |
73 | When it's up we can access the MLFlow website at `http://localhost:5000`. If
74 | this is running on a remote machine without firewalled access, you could access
75 | via `http://remotehost:5000` (ie if the remote hostname were 'remotehost'), or
76 | if only access to remotehost is via ssh tunnel, then this command running in a
77 | separate terminal:
78 | ```bash
79 | ssh -CNL 5000:localhost:5000 @
80 | ```
81 | will allow you to access the MLFlow website via `http://localhost:5000` locally.
82 | If running on AWS, that ssh line might look something like:
83 | ```bash
84 | ssh -CNi "~/.ssh/my_awskey.pem" -L 5000:localhost:5000 ec2-user@12.34.56.78
85 | ```
86 |
87 | You can shut the docker-compose all down via `make stop` which just runs a
88 | docker compose down command.
89 |
90 | There are a set of environment variables that can control the behavior of the
91 | implementation, but depending on one's needs one one may get away with not
92 | specifying any of them, simply using the defaults for all of them. Password
93 | for the database is supplied securely via a ~/.pgpass file, PostgreSQL's standard
94 | handling mechanism.
95 |
96 | Here are the possible env vars one may set, and their defaults which will be
97 | used if the variable is not explicitly set. For runs in the default setup you
98 | can start it up without setting any of these.
99 | ```bash
100 | # only bother with the ones you want to change from defaults
101 | export DB_NAME=mlflow
102 | export DB_USER=postgres # default is admin user of standalone database, but
103 | # in pre-existing database would use regular user account
104 | export DB_SERVER=db # 'db' is the name of the default standalone database
105 | # container, but DB_SERVER could be set to something like
106 | # mydatabaseserver.abcdefghij.us-west-2.rds.amazonaws.com
107 | export DB_PORT=5432 # port of database process
108 | export PGADMINPW=~/.pgadminpw # file containing pw to use for admin user of new standalone db (if used)
109 | export PGPASS=~/.pgpass # file containing pw to use for mlflow (DB_USER) account, in PostgreSQL pgpass format
110 | export FILESTORE=/storage/mlruns # if using filesystem for artifacts; unused if using S3
111 | export AWS_DEFAULT_REGION=us-west-2 # unused unless using S3
112 | export AWS_S3BUCKETURL=s3://mybucketname/myprefix/ # unused unless using S3
113 | export AWS_ACCESS_KEY_ID=xxxxxxxxxxxxxxxx # unused unless using S3
114 | export AWS_SECRET_ACCESS_KEY=xxxxxxxxxxxxxxxx # unused unless using S3
115 | ```
116 |
117 | *Warning:*
118 | Note regardless of the mechanisms noted above, it's important to note that the
119 | public domain version of MLflow is still fundamentally insecure, with no user logins.
120 | One should run this strictly on a secure, company-internal, firewalled intranet
121 | and/or wrapped within some secure/https, internet-facing layer.
122 | Overall the typical use-case here is individual or small-group usage contained
123 | inside a company's internal network behind a firewall, so not at the top of my
124 | concern list. Please beware for use-cases beyond that.
125 |
126 |
127 | ## A few other functionalities to note
128 |
129 | The makefile contains the following two macros which can be useful in testing
130 | and development:
131 |
132 | * `make mlflowquickcheck` just outputs the MLflow experiments list as a
133 | connectivity test, answering the basic question of "is it working?"
134 |
135 | * `make mlflowpopulate` runs the small, quick-running example project
136 | 'mlflow-example' to generate some example/test contents in your MLflow
137 | instance. This test content in a rapidly-spun-up mlflow instance can be
138 | really useful when testing other tools such as the
139 | [vim-mlflow](https://github.com/aganse/vim-mlflow) Vim plugin.
140 |
141 |
142 | ## Relevant links
143 |
144 | Initial implementation was originally based on
145 | [Guillaume Androz's 10-Jan-2020 Toward-Data-Science post, "Deploy MLflow with docker compose"](https://towardsdatascience.com/deploy-mlflow-with-docker-compose-8059f16b6039) (thanks for getting me started!)
146 |
147 | Other links:
148 | https://github.com/ymym3412/mlflow-docker-compose
149 | https://medium.com/vantageai/keeping-your-ml-model-in-shape-with-kafka-airflow-and-mlflow-143d20024ba6
150 | https://docs.nginx.com/nginx/admin-guide/security-controls/configuring-http-basic-authentication/
151 | https://www.digitalocean.com/community/tutorials/how-to-set-up-password-authentication-with-nginx-on-ubuntu-14-04
152 | https://www.digitalocean.com/community/tutorials/how-to-set-up-http-authentication-with-nginx-on-ubuntu-12-10
153 |
--------------------------------------------------------------------------------
/compose_variations/README.md:
--------------------------------------------------------------------------------
1 | The goal here was to have these files here as ready-to-go variations for
2 | different DBs and configs. But fyi in practice I've only actually focused on
3 | the Postgresql-based scripts, so the sqlite and mysql based scripts may be out
4 | of date or may need a tweak to work, and additionally I've mainly focused on
5 | the docker-compose.yaml in root dir so there may be a few other differences.
6 |
7 |
8 | | file | description |
9 | |------------------------------------|-------------|
10 | |docker-compose.mlflow_existingpostgres.yaml|connect mlflow to pre-existing postgres mlflow backend database (repo default)|
11 | |docker-compose.mlflow_newpostgres.yaml|stand up a new postgres db and connect mlflow to it for backend|
12 | |docker-compose.mlflow_sqlite.yaml |stand up a new sqlite db and connect mlflow to it for backend|
13 | |docker-compose.mlflow_mysql.yaml |stand up a new mysql db and connect mlflow to it for backend|
14 | |docker-compose.orig.yaml |original scripts from [Guillaume Androz's 10-Jan-2020 Toward-Data-Science article](https://towardsdatascience.com/deploy-mlflow-with-docker-compose-8059f16b6039) per README.md|
15 |
16 |
17 | To use one of these just copy it to ../docker-compose.yaml.
18 | Note the repo's default docker-compose.yaml in its root directory is
19 | docker-compose.mlflow_existingpostgres.yaml to begin with.
20 |
21 |
--------------------------------------------------------------------------------
/compose_variations/docker-compose.mlflow_existingpostgres.yaml:
--------------------------------------------------------------------------------
1 | # This version hides the underlying postgres database on a backend network
2 | # different from the frontend network on which nginx interfaces mlflow.
3 | # And mlflow is hidden behind nginx, allowing user auth to be implemented.
4 | #
5 | # The following environment vars can be set in the shell before running
6 | # docker-compose (default values are these; at minimum change DB_PW).
7 | # export DB_SERVER=db # defaults to db container; can replace with ip address
8 | # export DB_NAME=mlflow
9 | # export DB_USER=postgres
10 | # export PGPASS=~/.pgpass # path of .pgpass file containing db pw
11 | # export AWSCREDS=~/.aws # path of AWS credentials files
12 | # export DB_PORT=5432
13 | # export FILESTORE=mlruns_vol # defaults to docker vol; can replace with dir
14 | # export MLFLOW_PORT=5000
15 | # export AWS_S3BUCKETURL=s3://mybucket/myprefix/ # no quotes
16 | # export AWS_REGION=us-west-2 # no quotes
17 | # Those defaults are set automatically - you only need specify the ones you
18 | # want to change.
19 | #
20 | # Note artifacts can be stored in an AWS S3 bucket, in a docker volume called
21 | # mlruns_vol, or a local drive, via choice of the ARTIFACTS_ROOT variable.
22 |
23 | version: '3.3'
24 |
25 | services:
26 | app:
27 | restart: always
28 | build: ./mlflow
29 | image: mlflow_server
30 | container_name: mlflow_server
31 | expose:
32 | - 5001
33 | environment:
34 | - BACKEND=postgresql://${DB_USER:-mlflow}@${DB_SERVER:-db}:${DB_PORT:-5432}/${DB_NAME:-mlflow}
35 | # - ARTIFACTS_ROOT=/mlruns # for artifact store in locally mapped volume (below)
36 | - ARTIFACTS_ROOT=mlflow-artifacts:/ # for artifact store in AWS S3 (or delete this var)
37 | - ARTIFACTS_DEST=${AWS_S3BUCKETURL}
38 | - AWS_DEFAULT_REGION=${AWS_REGION}
39 | # If using AWS the credentials file(s) must be in the .aws directory mapped below.
40 | volumes:
41 | - ${FILESTORE:-mlruns_vol}:/mlruns # ignored if using S3
42 | - ${PGPASS:-~/.pgpass}:/root/.pgpass # provides pw for mlflow database, outside of container
43 | - ${AWSCREDS:-~/.aws}:/root/.aws # provides AWS creds, outside of container
44 | - condaenv_vol:/opt/conda # provides continuity/speed when looping runs with same container
45 | command:
46 | - sh # (sh form here allows for var substitution of BACKEND and ARTIFACTS vars)
47 | - -c
48 | - mlflow server
49 | --port 5001
50 | --host 0.0.0.0
51 | --backend-store-uri $${BACKEND}
52 | --default-artifact-root $${ARTIFACTS_ROOT}
53 | --artifacts-destination $${ARTIFACTS_DEST}
54 | --serve-artifacts
55 |
56 | nginx:
57 | restart: always
58 | build: ./nginx
59 | image: mlflow_nginx
60 | container_name: mlflow_nginx
61 | ports:
62 | - "${MLFLOW_PORT:-5000}:80"
63 | depends_on:
64 | - app
65 |
66 | volumes:
67 | mlruns_vol:
68 | condaenv_vol:
69 |
--------------------------------------------------------------------------------
/compose_variations/docker-compose.mlflow_mysql.yaml:
--------------------------------------------------------------------------------
1 | # This version exposes the underlying mysql database as well as the mlflow
2 | # server, so we can access the database contents directly.
3 | #
4 | # Have the following environment vars set in shell before running docker-compose
5 | # (suggested values here but can use whatever desired):
6 | # export DB_NAME=mlflowdb
7 | # export DB_USER=mluser
8 | # export DB_PW=
9 | # export DB_ROOTPW=
10 | # export DB_PORT=3306
11 | # export MLFLOW_PORT=5001
12 | #
13 | # AWS S3 bucket can be used instead of local drive for artifacts store via
14 | # commented-out environment lines below.
15 |
16 | version: '3.3'
17 |
18 | services:
19 | db:
20 | restart: always
21 | image: mysql/mysql-server:5.7.28
22 | container_name: mlflow_db
23 | networks:
24 | - mydefault
25 | expose:
26 | - ${DB_PORT}
27 | ports:
28 | - "${DB_PORT}:${DB_PORT}"
29 | environment:
30 | - MYSQL_DATABASE=${DB_NAME}
31 | - MYSQL_USER=${DB_USER}
32 | - MYSQL_PASSWORD=${DB_PW}
33 | - MYSQL_ROOT_PASSWORD=${DB_ROOTPW}
34 | volumes:
35 | - db_datams:/var/lib/mysql
36 |
37 | app:
38 | restart: always
39 | build: ./mlflow
40 | image: mlflow_server
41 | container_name: mlflow_server
42 | networks:
43 | - mydefault
44 | expose:
45 | - ${MLFLOW_PORT}
46 | ports:
47 | - "${MLFLOW_PORT}:${MLFLOW_PORT}"
48 | environment:
49 | - BACKEND=mysql+pymysql://${DB_USER}:${DB_PW}@db:${DB_PORT}/${DB_NAME}
50 | - ARTIFACTS=/mlruns
51 | # For artifact store in AWS S3 (note boto was installed in container):
52 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
53 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
54 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
55 | # - ARTIFACTS="s3://mlflow_bucket/mlflow/"
56 | volumes:
57 | - mlrun_data:/mlruns
58 | command:
59 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS)
60 | - -c
61 | - mlflow server
62 | --port ${MLFLOW_PORT}
63 | --host 0.0.0.0
64 | --backend-store-uri $${BACKEND}
65 | --default-artifact-root $${ARTIFACTS}
66 | depends_on:
67 | - db
68 |
69 | # nginx:
70 | # restart: always
71 | # build: ./nginx
72 | # image: mlflow_nginx
73 | # container_name: mlflow_nginx
74 | # ports:
75 | # - "80:80"
76 | # networks:
77 | # - frontend
78 | # depends_on:
79 | # - web
80 |
81 | networks:
82 | mydefault:
83 | driver: bridge
84 |
85 | volumes:
86 | db_datams:
87 | mlrun_data:
88 |
--------------------------------------------------------------------------------
/compose_variations/docker-compose.mlflow_newpostgres.yaml:
--------------------------------------------------------------------------------
1 | # This version hides the underlying postgres database on a backend network
2 | # different from the frontend network on which nginx interfaces mlflow.
3 | # And mlflow is hidden behind nginx, allowing user auth to be implemented.
4 | #
5 | # The following environment vars can be set in the shell before running
6 | # docker-compose (default values are these; at minimum change DB_PW).
7 | # export DB_NAME=mlflow
8 | # export DB_USER=postgres
9 | # export DB_SERVER=db # ie defaults to db container; can replace with ip address
10 | # export PGPASS=~/.pgpass # path of .pgpass file (contains pw)
11 | # export DB_PORT=5432
12 | # export FILESTORE=mlruns_vol # ie defaults to docker volume; can replace with filesys dir
13 | # export MLFLOW_PORT=5000
14 | # Those defaults are set automatically so you only need specify the ones you
15 | # want to change - eg a new DB_PW value should be set but the rest are optional.
16 | #
17 | # Note an AWS S3 bucket can be used instead of local drive for the artifacts
18 | # store, via the commented-out environment lines below.
19 |
20 | version: '3.3'
21 |
22 | services:
23 | # If using external DB_SERVER, comment out this db container
24 | db:
25 | restart: always
26 | image: postgres:13
27 | container_name: mlflow_db
28 | expose:
29 | - ${DB_PORT:-5432}
30 | # networks:
31 | # - backend
32 | environment:
33 | # - MUID=$UID
34 | # - MGID=$GID
35 | - POSTGRES_DB=${DB_NAME:-mlflow}
36 | - POSTGRES_USER=${DB_USER:-postgres}
37 | - POSTGRES_PASSWORD_FILE=/run/secrets/pg_admin_pw
38 | secrets:
39 | - pg_admin_pw
40 | volumes:
41 | - datapg_vol:/var/lib/postgresql/data
42 |
43 | app:
44 | restart: always
45 | build: ./mlflow
46 | image: mlflow_server
47 | container_name: mlflow_server
48 | expose:
49 | - 5001
50 | # networks:
51 | # - frontend
52 | # - backend
53 | environment:
54 | - BACKEND=postgresql://${DB_USER:-postgres}@${DB_SERVER:-db}:${DB_PORT:-5432}/${DB_NAME:-mlflow}
55 | - ARTIFACTS=/mlruns # in-container path to filestore in filesys
56 | # For artifact store in AWS S3 (uses boto that was installed in container):
57 | # Commment out ARTIFACTS line above and instead use:
58 | # - ARTIFACTS="s3://mlflow_bucket/my_mlflow_dir/"
59 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
60 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
61 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
62 | volumes:
63 | - ${FILESTORE:-mlruns_vol}:/mlruns # can comment out this line if using S3
64 | - ${PGPASS:-~/.pgpass}:/root/.pgpass # provides the pw for BACKEND database
65 | - condaenv_vol:/opt/conda # provides continuity/speed when looping runs with same container
66 | command:
67 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS)
68 | - -c
69 | - mlflow server
70 | --port 5001
71 | --host 0.0.0.0
72 | --backend-store-uri $${BACKEND}
73 | --default-artifact-root $${ARTIFACTS}
74 | # depends_on:
75 | # - db
76 |
77 | nginx:
78 | restart: always
79 | build: ./nginx
80 | image: mlflow_nginx
81 | container_name: mlflow_nginx
82 | ports:
83 | - "${MLFLOW_PORT:-5000}:80"
84 | # networks:
85 | # - frontend
86 | depends_on:
87 | - app
88 |
89 | # networks:
90 | # frontend:
91 | # driver: bridge
92 | # backend:
93 | # driver: bridge
94 |
95 | secrets:
96 | pg_admin_pw:
97 | file: ~/.pgadminpw
98 |
99 | volumes:
100 | mlruns_vol:
101 | datapg_vol:
102 | condaenv_vol:
103 |
--------------------------------------------------------------------------------
/compose_variations/docker-compose.mlflow_postgres.yaml:
--------------------------------------------------------------------------------
1 | # This version exposes the underlying postgres database as well as the mlflow
2 | # server, so we can access the database contents directly.
3 | #
4 | # Have the following environment vars set in shell before running docker-compose
5 | # (suggested values here but can use whatever desired):
6 | # export DB_NAME=mlflowdb
7 | # export DB_USER=postgres
8 | # export DB_PW=
9 | # export DB_PORT=5432
10 | # export MLFLOW_PORT=5001
11 | #
12 | # AWS S3 bucket can be used instead of local drive for artifacts store via
13 | # commented-out environment lines below.
14 |
15 | version: '3.3'
16 |
17 | services:
18 | db:
19 | restart: always
20 | image: postgres:latest
21 | container_name: mlflow_db
22 | expose:
23 | - ${DB_PORT}
24 | networks:
25 | - mydefault
26 | ports:
27 | - "${DB_PORT}:${DB_PORT}"
28 | environment:
29 | - POSTGRES_DB=${DB_NAME}
30 | - POSTGRES_USER=${DB_USER}
31 | - POSTGRES_PASSWORD=${DB_PW}
32 | volumes:
33 | - db_datapg:/var/lib/postgresql/data
34 |
35 | app:
36 | restart: always
37 | build: ./mlflow
38 | image: mlflow_server
39 | container_name: mlflow_server
40 | networks:
41 | - mydefault
42 | expose:
43 | - ${MLFLOW_PORT}
44 | ports:
45 | - "${MLFLOW_PORT}:${MLFLOW_PORT}"
46 | environment:
47 | - BACKEND=postgresql://${DB_USER}:${DB_PW}@db:${DB_PORT}/${DB_NAME}
48 | - ARTIFACTS=/mlruns
49 | # For artifact store in AWS S3 (note boto was installed in container):
50 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
51 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
52 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
53 | # - ARTIFACTS="s3://mlflow_bucket/mlflow/"
54 | volumes:
55 | - mlrun_data:/mlruns
56 |
57 | command:
58 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS)
59 | - -c
60 | - mlflow server
61 | --port ${MLFLOW_PORT}
62 | --host 0.0.0.0
63 | --backend-store-uri $${BACKEND}
64 | --default-artifact-root $${ARTIFACTS}
65 | depends_on:
66 | - db
67 |
68 |
69 |
70 | networks:
71 | mydefault:
72 | driver: bridge
73 |
74 | volumes:
75 | db_datapg:
76 | mlrun_data:
77 |
--------------------------------------------------------------------------------
/compose_variations/docker-compose.mlflow_postgres_nginx.yaml:
--------------------------------------------------------------------------------
1 | # This version hides the underlying postgres database on a backend network
2 | # different from the frontend network on which nginx interfaces mlflow.
3 | # And mlflow is hidden behind nginx, allowing user auth to be implemented.
4 | #
5 | # Have the following environment vars set in shell before running docker-compose
6 | # (suggested values here but can use whatever desired):
7 | # export DB_NAME=mlflowdb
8 | # export DB_USER=postgres
9 | # export DB_PW=
10 | # export DB_PORT=5432
11 | # export MLFLOW_PORT=5000
12 | #
13 | # AWS S3 bucket can be used instead of local drive for artifacts store via
14 | # commented-out environment lines below.
15 |
16 | version: '3.3'
17 |
18 | services:
19 | db:
20 | restart: always
21 | image: postgres:latest
22 | container_name: mlflow_db
23 | expose:
24 | - ${DB_PORT}
25 | networks:
26 | - backend
27 | environment:
28 | - MUID=$UID
29 | - MGID=$GID
30 | - POSTGRES_DB=${DB_NAME}
31 | - POSTGRES_USER=${DB_USER}
32 | - POSTGRES_PASSWORD=${DB_PW}
33 | volumes:
34 | - db_datapg:/var/lib/postgresql/data
35 |
36 | app:
37 | restart: always
38 | build: ./mlflow
39 | image: mlflow_server
40 | container_name: mlflow_server
41 | expose:
42 | - 5001
43 | networks:
44 | - frontend
45 | - backend
46 | environment:
47 | - BACKEND=postgresql://${DB_USER}:${DB_PW}@db:${DB_PORT}/${DB_NAME}
48 | - ARTIFACTS=/mlruns
49 | # For artifact store in AWS S3 (note boto was installed in container):
50 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
51 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
52 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
53 | # - ARTIFACTS="s3://mlflow_bucket/mlflow/"
54 | volumes:
55 | - mlrun_data:/mlruns
56 |
57 | command:
58 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS)
59 | - -c
60 | - mlflow server
61 | --port 5001
62 | --host 0.0.0.0
63 | --backend-store-uri $${BACKEND}
64 | --default-artifact-root $${ARTIFACTS}
65 | depends_on:
66 | - db
67 |
68 | nginx:
69 | restart: always
70 | build: ./nginx
71 | image: mlflow_nginx
72 | container_name: mlflow_nginx
73 | ports:
74 | - "${MLFLOW_PORT}:80"
75 | networks:
76 | - frontend
77 | depends_on:
78 | - app
79 |
80 | networks:
81 | frontend:
82 | driver: bridge
83 | backend:
84 | driver: bridge
85 |
86 | volumes:
87 | db_datapg:
88 | mlrun_data:
89 |
--------------------------------------------------------------------------------
/compose_variations/docker-compose.mlflow_sqlite.yaml:
--------------------------------------------------------------------------------
1 | # Have the following environment vars set in shell before running docker-compose
2 | # (suggested values here but can use whatever desired):
3 | # export DB_NAME=mlflowdb
4 | # export MLFLOW_PORT=5001
5 |
6 | version: '3.3'
7 |
8 | services:
9 | app:
10 | restart: always
11 | build: ./mlflow
12 | image: mlflow_server
13 | container_name: mlflow_server
14 | # networks:
15 | # - frontend
16 | expose:
17 | - ${MLFLOW_PORT}
18 | ports:
19 | - "${MLFLOW_PORT}:${MLFLOW_PORT}"
20 | environment:
21 | - BACKEND=sqlite:///db/${DB_NAME}.db
22 | - ARTIFACTS=/mlruns
23 | # For artifact store in AWS S3 (note boto was installed in container):
24 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
25 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
26 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
27 | # - ARTIFACTS="s3://mlflow_bucket/mlflow/"
28 | volumes:
29 | - mlrun_data:/mlruns
30 | - sqlitedb_data:/db
31 |
32 | command:
33 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS)
34 | - -c
35 | - mlflow server
36 | --port ${MLFLOW_PORT}
37 | --host 0.0.0.0
38 | --backend-store-uri $${BACKEND}
39 | --default-artifact-root $${ARTIFACTS}
40 |
41 | # still useful if we tweak to handle htpasswd basic authentication to gate users
42 | # nginx:
43 | # restart: always
44 | # build: ./nginx
45 | # image: mlflow_nginx
46 | # container_name: mlflow_nginx
47 | # ports:
48 | # - "80:80"
49 | # networks:
50 | # - frontend
51 | # - backend
52 | # depends_on:
53 | # - app
54 |
55 | # networks:
56 | # frontend:
57 | # driver: bridge
58 | # backend:
59 | # driver: bridge
60 |
61 | volumes:
62 | mlrun_data:
63 | sqlitedb_data:
64 |
--------------------------------------------------------------------------------
/compose_variations/docker-compose.orig.yaml:
--------------------------------------------------------------------------------
1 | version: '3.3'
2 |
3 | services:
4 | db:
5 | restart: always
6 | image: mysql/mysql-server:5.7.28
7 | container_name: mlflow_db
8 | expose:
9 | - "3306"
10 | networks:
11 | - backend
12 | environment:
13 | - MYSQL_DATABASE=${MYSQL_DATABASE}
14 | - MYSQL_USER=${MYSQL_USER}
15 | - MYSQL_PASSWORD=${MYSQL_PASSWORD}
16 | - MYSQL_ROOT_PASSWORD=${MYSQL_ROOT_PASSWORD}
17 | volumes:
18 | - dbdata:/var/lib/mysql
19 |
20 | web:
21 | restart: always
22 | build: ./mlflow
23 | image: mlflow_server
24 | container_name: mlflow_server
25 | expose:
26 | - "5000"
27 | networks:
28 | - frontend
29 | - backend
30 | environment:
31 | - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
32 | - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
33 | - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
34 | command: mlflow server --backend-store-uri mysql+pymysql://${MYSQL_USER}:${MYSQL_PASSWORD}@db:3306/${MYSQL_DATABASE} --default-artifact-root s3://mlflow_bucket/mlflow/ --host 0.0.0.0
35 |
36 | nginx:
37 | restart: always
38 | build: ./nginx
39 | image: mlflow_nginx
40 | container_name: mlflow_nginx
41 | ports:
42 | - "80:80"
43 | networks:
44 | - frontend
45 | depends_on:
46 | - web
47 |
48 | networks:
49 | frontend:
50 | driver: bridge
51 | backend:
52 | driver: bridge
53 |
54 | volumes:
55 | dbdata:
56 |
--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | # This version hides the underlying postgres database on a backend network
2 | # different from the frontend network on which nginx interfaces mlflow.
3 | # And mlflow is hidden behind nginx, allowing user auth to be implemented.
4 | #
5 | # The following environment vars can be set in the shell before running
6 | # docker-compose (default values are these; at minimum change DB_PW).
7 | # export DB_NAME=mlflow
8 | # export DB_USER=postgres
9 | # export DB_SERVER=db # ie defaults to db container; can replace with ip address
10 | # export PGPASS=~/.pgpass # path of .pgpass file (contains pw)
11 | # export DB_PORT=5432
12 | # export FILESTORE=mlruns_vol # ie defaults to docker volume; can replace with filesys dir
13 | # export MLFLOW_PORT=5000
14 | # Those defaults are set automatically so you only need specify the ones you
15 | # want to change - eg a new DB_PW value should be set but the rest are optional.
16 | #
17 | # Note an AWS S3 bucket can be used instead of local drive for the artifacts
18 | # store, via the commented-out environment lines below.
19 |
20 | version: '3.3'
21 |
22 | services:
23 | db:
24 | restart: always
25 | image: postgres:13
26 | container_name: mlflow_db
27 | expose:
28 | - ${DB_PORT:-5432}
29 | # networks:
30 | # - backend
31 | environment:
32 | # - MUID=$UID
33 | # - MGID=$GID
34 | - POSTGRES_DB=${DB_NAME:-mlflow}
35 | - POSTGRES_USER=${DB_USER:-postgres}
36 | - POSTGRES_PASSWORD_FILE=/run/secrets/pg_admin_pw
37 | secrets:
38 | - pg_admin_pw
39 | volumes:
40 | - datapg_vol:/var/lib/postgresql/data
41 |
42 | app:
43 | restart: always
44 | build: ./mlflow
45 | image: mlflow_server
46 | container_name: mlflow_server
47 | expose:
48 | - 5001
49 | # networks:
50 | # - frontend
51 | # - backend
52 | environment:
53 | - BACKEND=postgresql://${DB_USER:-postgres}@${DB_SERVER:-db}:${DB_PORT:-5432}/${DB_NAME:-mlflow}
54 | - ARTIFACTS=/storage/mlruns # in-container path to filestore in filesys
55 | # For artifact store in AWS S3 (uses boto that was installed in container):
56 | # Commment out ARTIFACTS line above and instead use:
57 | # - ARTIFACTS="s3://mlflow_bucket/my_mlflow_dir/"
58 | # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
59 | # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
60 | # - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
61 | volumes:
62 | - ${FILESTORE:-/storage/mlruns}:/storage/mlruns # can comment out this line if using S3
63 | - ${PGPASS:-~/.pgpass}:/root/.pgpass # provides the pw for BACKEND database
64 | - condaenv_vol:/opt/conda # provides continuity/speed when looping runs with same container
65 | command:
66 | - sh # (sh allows for var substitution of BACKEND and ARTIFACTS)
67 | - -c
68 | - mlflow server
69 | --port 5001
70 | --host 0.0.0.0
71 | --backend-store-uri $${BACKEND}
72 | --default-artifact-root $${ARTIFACTS}
73 | # depends_on:
74 | # - db
75 |
76 | nginx:
77 | restart: always
78 | build: ./nginx
79 | image: mlflow_nginx
80 | container_name: mlflow_nginx
81 | ports:
82 | - "${MLFLOW_PORT:-5000}:80"
83 | # networks:
84 | # - frontend
85 | depends_on:
86 | - app
87 |
88 | # networks:
89 | # frontend:
90 | # driver: bridge
91 | # backend:
92 | # driver: bridge
93 |
94 | secrets:
95 | pg_admin_pw:
96 | file: ~/.pgadminpw
97 |
98 | volumes:
99 | mlruns_vol:
100 | datapg_vol:
101 | condaenv_vol:
102 |
--------------------------------------------------------------------------------
/mlflow/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3
2 | RUN apt-get update && apt-get install -y git
3 |
4 | RUN conda install -c conda-forge shap # note shap's binary pkg not available in pip
5 | RUN pip install mlflow psycopg2-binary pymysql boto3
6 |
7 | RUN cd /home && git clone https://github.com/mlflow/mlflow-example.git
8 |
--------------------------------------------------------------------------------
/nginx/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx:1.17.6
2 | # Remove default Nginx config
3 | RUN rm /etc/nginx/nginx.conf
4 | # Copy the modified Nginx conf
5 | COPY nginx.conf /etc/nginx
6 | # Copy proxy config
7 | COPY mlflow.conf /etc/nginx/sites-enabled/
8 |
--------------------------------------------------------------------------------
/nginx/mlflow.conf:
--------------------------------------------------------------------------------
1 | # Define the parameters for a specific virtual host/server
2 | server {
3 | # Define the server name, IP address, and/or port of the server
4 | listen 80;
5 |
6 | # Define the specified charset to the “Content-Type” response header field
7 | charset utf-8;
8 |
9 | # Configure NGINX to reverse proxy HTTP requests to the upstream server (uWSGI server)
10 | location / {
11 | # Define the location of the proxy server to send the request to
12 | proxy_pass http://app:5001;
13 |
14 | # Redefine the header fields that NGINX sends to the upstream server
15 | proxy_set_header Host $host;
16 | proxy_set_header X-Real-IP $remote_addr;
17 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/nginx/nginx.conf:
--------------------------------------------------------------------------------
1 | # Define the user that will own and run the Nginx server
2 | user nginx;
3 | # Define the number of worker processes; recommended value is the number of
4 | # cores that are being used by your server
5 | worker_processes 1;
6 |
7 | # Define the location on the file system of the error log, plus the minimum
8 | # severity to log messages for
9 | error_log /var/log/nginx/error.log warn;
10 | # Define the file that will store the process ID of the main NGINX process
11 | pid /var/run/nginx.pid;
12 |
13 |
14 | # events block defines the parameters that affect connection processing.
15 | events {
16 | # Define the maximum number of simultaneous connections that can be opened by a worker process
17 | worker_connections 1024;
18 | }
19 |
20 |
21 | # http block defines the parameters for how NGINX should handle HTTP web traffic
22 | http {
23 | # Include the file defining the list of file types that are supported by NGINX
24 | include /etc/nginx/mime.types;
25 | # Define the default file type that is returned to the user
26 | default_type text/html;
27 |
28 | # Define the format of log messages.
29 | log_format main '$remote_addr - $remote_user [$time_local] "$request" '
30 | '$status $body_bytes_sent "$http_referer" '
31 | '"$http_user_agent" "$http_x_forwarded_for"';
32 |
33 | # Define the location of the log of access attempts to NGINX
34 | access_log /var/log/nginx/access.log main;
35 |
36 | # Define the parameters to optimize the delivery of static content
37 | sendfile on;
38 | tcp_nopush on;
39 | tcp_nodelay on;
40 |
41 | # Define the timeout value for keep-alive connections with the client
42 | keepalive_timeout 65;
43 |
44 | # Define the usage of the gzip compression algorithm to reduce the amount of data to transmit
45 | #gzip on;
46 |
47 | # Include additional parameters for virtual host(s)/server(s)
48 | include /etc/nginx/sites-enabled/*.conf;
49 | }
50 |
--------------------------------------------------------------------------------