├── .dockerignore ├── .env ├── .github └── workflows │ └── verify-docker-compose.yml ├── .gitignore ├── LICENSE ├── README.md ├── bashrc_generate.sh ├── bashrc_install.sh ├── create_bucket.py ├── docker-compose.yml ├── quickstart └── mlflow_tracking.py ├── run_create_bucket.sh └── test_experiment ├── Dockerfile └── mlflow_tracking.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | node_modules -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | AWS_ACCESS_KEY_ID=admin 2 | AWS_SECRET_ACCESS_KEY=sample_key 3 | AWS_REGION=us-east-1 4 | AWS_BUCKET_NAME=mlflow 5 | MYSQL_DATABASE=mlflow 6 | MYSQL_USER=mlflow_user 7 | MYSQL_PASSWORD=mlflow_password 8 | MYSQL_ROOT_PASSWORD=toor 9 | MLFLOW_S3_ENDPOINT_URL=http://localhost:9000 10 | MLFLOW_TRACKING_URI=http://localhost:5000 -------------------------------------------------------------------------------- /.github/workflows/verify-docker-compose.yml: -------------------------------------------------------------------------------- 1 | name: VerifyDockerCompose 2 | on: push 3 | jobs: 4 | verify: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v4 8 | - name: Show the config 9 | run: docker-compose config 10 | - name: Run 11 | run: docker-compose up -d --build 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # IPython 78 | profile_default/ 79 | ipython_config.py 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | .dmypy.json 112 | dmypy.json 113 | 114 | # Pyre type checker 115 | .pyre/ 116 | 117 | # mac 118 | # General 119 | .DS_Store 120 | .AppleDouble 121 | .LSOverride 122 | 123 | # Icon must end with two \r 124 | Icon 125 | 126 | 127 | # Thumbnails 128 | ._* 129 | 130 | # Files that might appear in the root of a volume 131 | .DocumentRevisions-V100 132 | .fseventsd 133 | .Spotlight-V100 134 | .TemporaryItems 135 | .Trashes 136 | .VolumeIcon.icns 137 | .com.apple.timemachine.donotpresent 138 | 139 | # Directories potentially created on remote AFP share 140 | .AppleDB 141 | .AppleDesktop 142 | Network Trash Folder 143 | Temporary Items 144 | .apdisk 145 | 146 | node_modules/ 147 | .localstack/ 148 | dbdata/ 149 | s3/ 150 | outputs/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Tomasz Dłuski 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MLFlow Docker Setup [![Actions Status](https://github.com/Toumash/mlflow-docker/workflows/VerifyDockerCompose/badge.svg)](https://github.com/Toumash/mlflow-docker/actions) 2 | 3 | > If you want to boot up mlflow project with one-liner - this repo is for you. 4 | > The only requirement is docker installed on your system and we are going to use Bash on linux/windows. 5 | 6 | # 🚀 1-2-3! Setup guide 7 | 1. Configure `.env` file for your choice. You can put there anything you like, it will be used to configure you services 8 | 2. Run `docker compose up` 9 | 3. Open up http://localhost:5000 for MlFlow, and http://localhost:9001/ to browse your files in S3 artifact store 10 | 11 | 12 | **👇Video tutorial how to set it up + BONUS with Microsoft Azure 👇** 13 | 14 | [![Youtube tutorial](https://user-images.githubusercontent.com/9840635/144674240-f1ede224-410a-4b77-a7b8-450f45cc79ba.png)](https://www.youtube.com/watch?v=ma5lA19IJRA) 15 | 16 | # Features 17 | - One file setup (.env) 18 | - Minio S3 artifact store with GUI 19 | - MySql mlflow storage 20 | - Ready to use bash scripts for python development! 21 | - Automatically-created s3 buckets 22 | 23 | 24 | ## How to use in ML development in python 25 | 26 |
27 | Click to show 28 | 29 | 1. Configure your client-side 30 | 31 | For running mlflow files you need various environment variables set on the client side. To generate them use the convienience script `./bashrc_install.sh`, which installs it on your system or `./bashrc_generate.sh`, which just displays the config to copy & paste. 32 | 33 | > $ ./bashrc_install.sh 34 | > [ OK ] Successfully installed environment variables into your .bashrc! 35 | 36 | The script installs this variables: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, MLFLOW_S3_ENDPOINT_URL, MLFLOW_TRACKING_URI. All of them are needed to use mlflow from the client-side. 37 | 38 | 2. Test the pipeline with below command with conda. If you dont have conda installed run with `--no-conda` 39 | 40 | ```shell 41 | mlflow run git@github.com:databricks/mlflow-example.git -P alpha=0.5 42 | # or 43 | python ./quickstart/mlflow_tracking.py 44 | ``` 45 | 46 | 3. *(Optional)* If you are constantly switching your environment you can use this environment variable syntax 47 | 48 | ```shell 49 | MLFLOW_S3_ENDPOINT_URL=http://localhost:9000 MLFLOW_TRACKING_URI=http://localhost:5000 mlflow run git@github.com:databricks/mlflow-example.git -P alpha=0.5 50 | ``` 51 | 52 |
53 | 54 | 55 | ## Licensing 56 | Copyright (c) 2021 Tomasz Dłuski 57 | 58 | Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License by reviewing the file [LICENSE](./LICENSE) in the repository. 59 | -------------------------------------------------------------------------------- /bashrc_generate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source .env 4 | 5 | echo "Copy and paste below configuration into your ~/.bashrc file!" 6 | echo "" 7 | echo "# MLFLOW CONFIG" 8 | echo "export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID" 9 | echo "export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" 10 | echo "export MLFLOW_S3_ENDPOINT_URL=$MLFLOW_S3_ENDPOINT_URL" 11 | echo "export MLFLOW_TRACKING_URI=$MLFLOW_TRACKING_URI" 12 | echo "# END MLFLOW CONFIG" -------------------------------------------------------------------------------- /bashrc_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source .env 4 | 5 | echo "# MLFLOW CONFIG" >> ~/.bashrc 6 | echo "export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID" >> ~/.bashrc 7 | echo "export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" >> ~/.bashrc 8 | echo "export MLFLOW_S3_ENDPOINT_URL=$MLFLOW_S3_ENDPOINT_URL" >> ~/.bashrc 9 | echo "export MLFLOW_TRACKING_URI=$MLFLOW_TRACKING_URI" >> ~/.bashrc 10 | echo "# END MLFLOW CONFIG" >> ~/.bashrc 11 | 12 | echo "[ OK ] Successfully installed environment variables into your .bashrc!" -------------------------------------------------------------------------------- /create_bucket.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from minio import Minio 4 | from minio.error import InvalidResponseError 5 | 6 | accessID = os.environ.get('AWS_ACCESS_KEY_ID') 7 | accessSecret = os.environ.get('AWS_SECRET_ACCESS_KEY') 8 | minioUrl = os.environ.get('MLFLOW_S3_ENDPOINT_URL') 9 | bucketName = os.environ.get('AWS_BUCKET_NAME') 10 | 11 | if accessID == None: 12 | print('[!] AWS_ACCESS_KEY_ID environment variable is empty! run \'source .env\' to load it from the .env file') 13 | exit(1) 14 | 15 | if accessSecret == None: 16 | print('[!] AWS_SECRET_ACCESS_KEY environment variable is empty! run \'source .env\' to load it from the .env file') 17 | exit(1) 18 | 19 | if minioUrl == None: 20 | print('[!] MLFLOW_S3_ENDPOINT_URL environment variable is empty! run \'source .env\' to load it from the .env file') 21 | exit(1) 22 | 23 | 24 | if bucketName == None: 25 | print('[!] AWS_BUCKET_NAME environment variable is empty! run \'source .env\' to load it from the .env file') 26 | exit(1) 27 | 28 | minioUrlHostWithPort = minioUrl.split('//')[1] 29 | print('[*] minio url: ',minioUrlHostWithPort) 30 | 31 | s3Client = Minio( 32 | minioUrlHostWithPort, 33 | access_key=accessID, 34 | secret_key=accessSecret, 35 | secure=False 36 | ) 37 | 38 | s3Client.make_bucket(bucketName) 39 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | services: 3 | s3: 4 | image: minio/minio:RELEASE.2023-11-01T18-37-25Z 5 | restart: unless-stopped 6 | ports: 7 | - "9000:9000" 8 | - "9001:9001" 9 | environment: 10 | - MINIO_ROOT_USER=${AWS_ACCESS_KEY_ID} 11 | - MINIO_ROOT_PASSWORD=${AWS_SECRET_ACCESS_KEY} 12 | command: server /data --console-address ":9001" 13 | networks: 14 | - internal 15 | - public 16 | volumes: 17 | - minio_new_volume:/data 18 | db: 19 | image: mysql:8-oracle # -oracle tag supports arm64 architecture! 20 | restart: unless-stopped 21 | container_name: mlflow_db 22 | expose: 23 | - "3306" 24 | environment: 25 | - MYSQL_DATABASE=${MYSQL_DATABASE} 26 | - MYSQL_USER=${MYSQL_USER} 27 | - MYSQL_PASSWORD=${MYSQL_PASSWORD} 28 | - MYSQL_ROOT_PASSWORD=${MYSQL_ROOT_PASSWORD} 29 | volumes: 30 | - db_new_volume:/var/lib/mysql 31 | networks: 32 | - internal 33 | mlflow: 34 | image: ubuntu/mlflow:2.1.1_1.0-22.04 35 | container_name: tracker_mlflow 36 | restart: unless-stopped 37 | ports: 38 | - "5000:5000" 39 | environment: 40 | - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 41 | - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 42 | - AWS_DEFAULT_REGION=${AWS_REGION} 43 | - MLFLOW_S3_ENDPOINT_URL=http://s3:9000 44 | networks: 45 | - public 46 | - internal 47 | entrypoint: mlflow server --backend-store-uri mysql+pymysql://${MYSQL_USER}:${MYSQL_PASSWORD}@db:3306/${MYSQL_DATABASE} --default-artifact-root s3://${AWS_BUCKET_NAME}/ --artifacts-destination s3://${AWS_BUCKET_NAME}/ -h 0.0.0.0 48 | depends_on: 49 | wait-for-db: 50 | condition: service_completed_successfully 51 | create_s3_buckets: 52 | image: minio/mc 53 | depends_on: 54 | - "s3" 55 | entrypoint: > 56 | /bin/sh -c " 57 | until (/usr/bin/mc alias set minio http://s3:9000 '${AWS_ACCESS_KEY_ID}' '${AWS_SECRET_ACCESS_KEY}') do echo '...waiting...' && sleep 1; done; 58 | /usr/bin/mc mb minio/${AWS_BUCKET_NAME}; 59 | exit 0; 60 | " 61 | networks: 62 | - internal 63 | wait-for-db: 64 | image: atkrad/wait4x 65 | depends_on: 66 | - db 67 | command: tcp db:3306 -t 90s -i 250ms 68 | networks: 69 | - internal 70 | run_test_experiment: 71 | build: 72 | context: ./test_experiment 73 | dockerfile: Dockerfile 74 | platform: linux/amd64 # once continuumio/miniconda3:latest image work on native aarch64 (arm), remove this line 75 | depends_on: 76 | - "mlflow" 77 | environment: 78 | - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 79 | - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 80 | - AWS_DEFAULT_REGION=${AWS_REGION} 81 | - MLFLOW_S3_ENDPOINT_URL=http://s3:9000 82 | - MLFLOW_TRACKING_URI=http://mlflow:5000 83 | entrypoint: > 84 | /bin/sh -c " 85 | python3 mlflow_tracking.py; 86 | exit 0; 87 | " 88 | networks: 89 | - internal 90 | networks: 91 | internal: 92 | public: 93 | driver: bridge 94 | volumes: 95 | db_new_volume: 96 | minio_new_volume: 97 | -------------------------------------------------------------------------------- /quickstart/mlflow_tracking.py: -------------------------------------------------------------------------------- 1 | import os 2 | from random import random, randint 3 | 4 | import mlflow 5 | 6 | if __name__ == "__main__": 7 | with mlflow.start_run() as run: 8 | mlflow.set_tracking_uri('http://localhost:5000') 9 | print("Running mlflow_tracking.py") 10 | 11 | mlflow.log_param("param1", randint(0, 100)) 12 | 13 | mlflow.log_metric("foo", random()) 14 | mlflow.log_metric("foo", random() + 1) 15 | mlflow.log_metric("foo", random() + 2) 16 | 17 | if not os.path.exists("outputs"): 18 | os.makedirs("outputs") 19 | with open("outputs/test.txt", "w") as f: 20 | f.write("hello world!") 21 | 22 | mlflow.log_artifacts("outputs") 23 | -------------------------------------------------------------------------------- /run_create_bucket.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o allexport; source .env; set +o allexport 4 | 5 | pip3 install Minio 6 | python3 ./create_bucket.py 7 | -------------------------------------------------------------------------------- /test_experiment/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:latest 2 | 3 | RUN pip install mlflow boto3 4 | 5 | WORKDIR /app 6 | COPY . . 7 | -------------------------------------------------------------------------------- /test_experiment/mlflow_tracking.py: -------------------------------------------------------------------------------- 1 | import os 2 | from random import random, randint 3 | 4 | import mlflow 5 | 6 | if __name__ == "__main__": 7 | with mlflow.start_run() as run: 8 | mlflow.set_tracking_uri('http://mlflow:5000') 9 | print("Running mlflow_tracking.py") 10 | 11 | mlflow.log_param("param1", randint(0, 100)) 12 | 13 | mlflow.log_metric("foo", random()) 14 | mlflow.log_metric("foo", random() + 1) 15 | mlflow.log_metric("foo", random() + 2) 16 | 17 | if not os.path.exists("outputs"): 18 | os.makedirs("outputs") 19 | with open("outputs/test.txt", "w") as f: 20 | f.write("hello world!") 21 | 22 | mlflow.log_artifacts("outputs") 23 | --------------------------------------------------------------------------------