├── .dockerignore ├── .github ├── CODEOWNERS ├── dependabot.yml └── workflows │ ├── build_branch.yml │ ├── build_tag.yml │ └── terraform_validate.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── app.json ├── docker-compose.yml ├── docs ├── diagrams.drawio └── images │ ├── architecture_mlflow.png │ └── mlflow_ui.png ├── heroku.yml ├── nginx.conf ├── requirements.txt ├── scripts ├── mlflow.sh └── nginx.sh ├── supervisord.conf └── terraform ├── README.md ├── iam.tf ├── locals.tf ├── network.tf ├── outputs.tf ├── providers.tf ├── rds.tf ├── s3.tf ├── s3 ├── main.tf ├── outputs.tf └── variables.tf ├── server.tf └── variables.tf /.dockerignore: -------------------------------------------------------------------------------- 1 | .github/ 2 | docs/ 3 | terraform/ 4 | .env 5 | 6 | # Common 7 | README.md 8 | CHANGELOG.md 9 | docker-compose.yml 10 | Dockerfile 11 | 12 | # git 13 | .git 14 | .gitattributes 15 | .gitignore -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @DougTrajano 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /.github/workflows/build_branch.yml: -------------------------------------------------------------------------------- 1 | name: build_branch 2 | 3 | on: 4 | push: 5 | branches: 6 | - '*' 7 | paths-ignore: 8 | - 'docs/**' 9 | - 'terraform/**' 10 | 11 | defaults: 12 | run: 13 | shell: bash 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v2 22 | 23 | - name: prepare 24 | id: prep 25 | run: | 26 | if [[ $GITHUB_REF == refs/heads/* ]]; then 27 | VERSION=${GITHUB_REF#refs/heads/} 28 | fi 29 | echo ::set-output name=version::${VERSION} 30 | 31 | - name: Configure AWS credentials 32 | id: configure-aws-credentials 33 | uses: aws-actions/configure-aws-credentials@v1 34 | with: 35 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 36 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 37 | aws-region: us-east-1 38 | 39 | - name: Login to Amazon ECR 40 | id: login-ecr 41 | uses: aws-actions/amazon-ecr-login@v1 42 | 43 | - name: Build, tag, and push image to Amazon ECR 44 | id: build-image 45 | env: 46 | ECR_REGISTRY: public.ecr.aws/t9j8s4z8 47 | ECR_REPOSITORY: mlflow 48 | IMAGE_TAG: ${{ steps.prep.outputs.version }} 49 | run: | 50 | aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $ECR_REGISTRY 51 | docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . 52 | docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG -------------------------------------------------------------------------------- /.github/workflows/build_tag.yml: -------------------------------------------------------------------------------- 1 | name: build_tag 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | paths-ignore: 8 | - 'docs/**' 9 | - 'terraform/**' 10 | 11 | defaults: 12 | run: 13 | shell: bash 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v2 22 | 23 | - name: prepare 24 | id: prep 25 | run: | 26 | if [[ $GITHUB_REF == refs/tags/* ]]; then 27 | VERSION=${GITHUB_REF#refs/tags/} 28 | fi 29 | echo ::set-output name=version::${VERSION} 30 | 31 | - name: Configure AWS credentials 32 | id: configure-aws-credentials 33 | uses: aws-actions/configure-aws-credentials@v1 34 | with: 35 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 36 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 37 | aws-region: us-east-1 38 | 39 | - name: Login to Amazon ECR 40 | id: login-ecr 41 | uses: aws-actions/amazon-ecr-login@v1 42 | 43 | - name: Build, tag, and push image to Amazon ECR 44 | id: build-image 45 | env: 46 | ECR_REGISTRY: public.ecr.aws/t9j8s4z8 47 | ECR_REPOSITORY: mlflow 48 | IMAGE_TAG: ${{ steps.prep.outputs.version }} 49 | run: | 50 | aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $ECR_REGISTRY 51 | docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . 52 | docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG 53 | docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:latest . 54 | docker push $ECR_REGISTRY/$ECR_REPOSITORY:latest -------------------------------------------------------------------------------- /.github/workflows/terraform_validate.yml: -------------------------------------------------------------------------------- 1 | name: validate-terraform 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | validate: 13 | runs-on: ubuntu-latest 14 | name: Validate terraform 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v2 18 | 19 | - name: terraform validate 20 | uses: dflook/terraform-validate@v1 21 | with: 22 | path: terraform -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Terraform template 3 | # Local .terraform directories 4 | **/.terraform/* 5 | 6 | # .tfstate files 7 | *.tfstate 8 | *.tfstate.* 9 | 10 | # Crash log files 11 | crash.log 12 | 13 | # Exclude all .tfvars files, which are likely to contain sentitive data, such as 14 | # password, private keys, and other secrets. These should not be part of version 15 | # control as they are data points which are potentially sensitive and subject 16 | # to change depending on the environment. 17 | # 18 | *.tfvars 19 | 20 | # Ignore override files as they are usually used to override resources locally and so 21 | # are not checked in 22 | override.tf 23 | override.tf.json 24 | *_override.tf 25 | *_override.tf.json 26 | *.lock.hcl 27 | 28 | # Include override files you do wish to add to version control using negated pattern 29 | # 30 | # !example_override.tf 31 | 32 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 33 | # example: *tfplan* 34 | 35 | # Ignore CLI configuration files 36 | .terraformrc 37 | terraform.rc 38 | 39 | ### Example user template template 40 | ### Example user template 41 | 42 | # IntelliJ project files 43 | .idea 44 | *.iml 45 | out 46 | gen 47 | 48 | *.env -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.8-slim 2 | 3 | WORKDIR /app 4 | 5 | COPY . /app/ 6 | 7 | RUN set -x && \ 8 | apt-get update && \ 9 | apt-get install --no-install-recommends --no-install-suggests -y \ 10 | supervisor gettext-base nginx apache2-utils 11 | 12 | COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf 13 | 14 | # We are getting the nginx.conf in the /scripts/nginx.sh file 15 | # COPY nginx.conf /etc/nginx/conf.d/default.conf 16 | 17 | # install pip then packages 18 | RUN pip install --upgrade pip && \ 19 | pip install -r requirements.txt --upgrade 20 | 21 | # Make scripts executable and run env-vars.sh 22 | RUN chmod +x /app/scripts/mlflow.sh && \ 23 | chmod +x /app/scripts/nginx.sh 24 | 25 | EXPOSE ${PORT} 26 | 27 | # WWW (nginx) 28 | RUN addgroup -gid 1000 www && \ 29 | adduser -uid 1000 -H -D -s /bin/sh -G www www 30 | 31 | ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [MLflow](https://www.mlflow.org/) with basic auth 2 | 3 | This project is **ARCHIVED** and it will not receive maintenance anymore because the MLflow **finally** supports basic authentication natively! \o/ 4 | 5 | ```yaml 6 | version: "3.9" 7 | 8 | services: 9 | mlflow: 10 | image: ghcr.io/mlflow/mlflow:v2.5.0 11 | ports: 12 | - 5000:5000 13 | command: mlflow server --host 0.0.0.0 --app-name basic-auth 14 | ``` 15 | 16 | Find further details in [MLflow Authentication — MLflow 2.5.0 documentation](https://mlflow.org/docs/latest/auth/index.html). 17 | 18 | --- 19 | 20 | ![](https://img.shields.io/badge/MLflow-0077B5?style=for-the-badge&logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjQiIGhlaWdodD0iMjUiIHZpZXdCb3g9IjAgMCAyNCAyNSIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KPHBhdGggZD0iTTE4Ljk1MDkgMi4xNzYzNEMxNC4wNjM1IC0xLjI0MzQ1IDcuNDA5MjYgLTAuNTcxMDUxIDMuMzA0ODUgMy43NTczNUMtMC43OTk1NTYgOC4wODU3NiAtMS4xMTc3NiAxNC43NjYzIDIuNTU2NjcgMTkuNDY1Mkw2LjIyNTI0IDE2Ljc3MjRDNC40MDQyMyAxNC41MTM0IDQuMDM0MzcgMTEuNDEyNCA1LjI3Mjk4IDguNzg4NDNDNi41MTE1OSA2LjE2NDQ4IDkuMTQwOCA0LjQ3OTE0IDEyLjA0MjIgNC40NDkyOUwxMS45NTU0IDcuMzE1ODFMMTguOTUwOSAyLjE3NjM0WiIgZmlsbD0iIzQzQzlFRCIvPgo8cGF0aCBkPSJNMjEuNjYzOSA0Ljg1MTc2QzIxLjU0MjMgNC42ODM4MiAyMS40MTQ5IDQuNTE4NzggMjEuMjg0NiA0LjM1OTUzTDE3Ljc1MjEgNi45NjU0NkMxOS43NDYzIDkuMTcyMzIgMjAuMjYzNyAxMi4zNDI0IDE5LjA3NDYgMTUuMDY4OEMxNy44ODU2IDE3Ljc5NTIgMTUuMjEwNCAxOS41NzI4IDEyLjIzNjIgMTkuNjEyOUwxMi4zMjMxIDE2Ljc0OTNMNS4yNDk0NSAyMS45NjExQzEwLjA5NzEgMjUuMjI1NSAxNi41ODk2IDI0LjUzMzcgMjAuNjQwNiAyMC4zMjEzQzI0LjY5MTcgMTYuMTA4OSAyNS4xMjk1IDkuNTk0MzcgMjEuNjc4NCA0Ljg3NzgyTDIxLjY2MzkgNC44NTE3NloiIGZpbGw9IiMwMTk0RTIiLz4KPC9zdmc+Cg==&logoColor=white) 21 | ![AWS](https://img.shields.io/badge/AWS-%23FF9900.svg?style=for-the-badge&logo=amazon-aws&logoColor=white) 22 | ![Heroku](https://img.shields.io/badge/heroku-%23430098.svg?style=for-the-badge&logo=heroku&logoColor=white) 23 | ![](https://img.shields.io/badge/Docker-2CA5E0?style=for-the-badge&logo=docker&logoColor=white) 24 | 25 | A dockerized MLflow Tracking Server with basic auth (username and password). 26 | 27 | You will have three options to deploy the server: [AWS](#aws), [Heroku](#heroku), and [local](#local). 28 | 29 | We provide a [Terraform](https://www.terraform.io/) stack that can be easily used to deploy the MLflow Tracking Server. 30 | 31 | > **NOTE**: This project is not intended to be used for production deployments. It is intended to be used for testing and development. 32 | 33 | ## Environment Variables 34 | 35 | The environment variables below are required to deploy this project. 36 | 37 | | Variable | Description | Default | 38 | | - | - | - | 39 | | PORT | Port for the MLflow server | `80` | 40 | | MLFLOW_ARTIFACT_URI | S3 Bucket URI for MLflow's artifact store | `"./mlruns"` 41 | | MLFLOW_BACKEND_URI | [SQLAlchemy database uri](https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls) (if provided, the other variables `MLFLOW_DB_*` are ignored) | | 42 | | DATABASE_URL | [SQLAlchemy database uri](https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls), it's used by Heroku deployment. Basically, we will move it to `MLFLOW_BACKEND_URI`. | | 43 | | MLFLOW_DB_DIALECT | Database dialect (e.g. postgresql, mysql+pymysql, sqlite) | `"postgresql"` | 44 | | MLFLOW_DB_USERNAME | Backend store username | `"mlflow"` | 45 | | MLFLOW_DB_PASSWORD | Backend store password | `"mlflow"` | 46 | | MLFLOW_DB_HOST | Backend store host | | 47 | | MLFLOW_DB_PORT | Backend store port | `3306` | 48 | | MLFLOW_DB_DATABASE | Backend store database | `"mlflow"` | 49 | | MLFLOW_TRACKING_USERNAME | Username for MLflow UI and API | `"mlflow"` | 50 | | MLFLOW_TRACKING_PASSWORD | Password for MLflow UI and API | `"mlflow"` | 51 | 52 | ## Deploying MLflow Tracking Server 53 | 54 | ### AWS 55 | 56 | ![](docs/images/architecture_mlflow.png) 57 | 58 |
Amazon ECR 59 |

60 | 61 | [Amazon Elastic Container Registry (ECR)](https://aws.amazon.com/ecr/) is a fully managed container registry that makes it easy to store, manage, share, and deploy your container images and artifacts anywhere. 62 | 63 |

64 |
65 | 66 |
App Runner 67 |

68 | 69 | [AWS App Runner](https://aws.amazon.com/apprunner/) is a fully managed service that makes it easy for developers to quickly deploy containerized web applications and APIs, at scale and with no prior infrastructure experience required. Start with your source code or a container image. 70 | 71 |

72 |
73 | 74 |
Amazon S3 75 |

76 | 77 | [Amazon Simple Storage Service (Amazon S3)](https://aws.amazon.com/s3/) is an object storage service that offers industry-leading scalability, data availability, security, and performance. 78 | 79 |

80 |
81 | 82 |
Amazon Aurora Serverless 83 |

84 | 85 | [Amazon Aurora Serverless](https://aws.amazon.com/rds/aurora/serverless/) is an on-demand, auto-scaling configuration for Amazon Aurora. It automatically starts up, shuts down, and scales capacity up or down based on your application's needs. You can run your database on AWS without managing database capacity. 86 | 87 |

88 |
89 | 90 | #### Prerequisites 91 | 92 | - [AWS Account](https://console.aws.amazon.com/console/) 93 | - [AWS CLI](https://aws.amazon.com/cli/) 94 | - [Terraform CLI](https://www.terraform.io/downloads.html) 95 | 96 | To deploy MLflow, you'll need to: 97 | 98 | 1. [Create an AWS account](https://aws.amazon.com/free/) if you don't already have one. 99 | 100 | 2. Configure AWS CLI to use your AWS account. 101 | 102 | 3. Clone this repository. 103 | 104 | ```bash 105 | git clone https://github.com/DougTrajano/mlflow-server.git 106 | ``` 107 | 108 | 4. Open `mlflow-server/terraform` folder. 109 | 110 | ```bash 111 | cd mlflow-server/terraform 112 | ``` 113 | 114 | 5. Run the following command to create all the required resources: 115 | 116 | ```bash 117 | terraform init 118 | terraform apply -var mlflow_username="YOUR-USERNAME" -var mlflow_password="YOUR-PASSWORD" 119 | ``` 120 | 121 | Multiple usernames and passwords can also be specified in a comma-delimited string: 122 | 123 | ```bash 124 | terraform apply -var mlflow_username="USERNAME1,USERNAME2,USERNAME3" -var mlflow_password="PASSWORD1,PASSWORD2,PASSWORD3" 125 | ``` 126 | 127 | See a full list of variables that can be used in [terraform/variables.tf](terraform/variables.tf). 128 | 129 | 6. Type "yes" when prompted to continue. 130 | 131 | ```log 132 | Plan: 21 to add, 0 to change, 0 to destroy. 133 | 134 | Changes to Outputs: 135 | + artifact_bucket_id = (known after apply) 136 | + mlflow_password = (sensitive value) 137 | + mlflow_username = "doug" 138 | + service_url = (known after apply) 139 | + status = (known after apply) 140 | 141 | Do you want to perform these actions? 142 | Terraform will perform the actions described above. 143 | Only 'yes' will be accepted to approve. 144 | 145 | Enter a value: yes 146 | ``` 147 | 148 | This will create the following resources: 149 | 150 | - An [S3 bucket](https://aws.amazon.com/s3/) is used to store MLflow artifacts. 151 | - An [IAM role and [policy](https://aws.amazon.com/iam/) that allows MLflow to access the S3 bucket. 152 | - An [Aurora RDS Serverless](https://aws.amazon.com/rds/aurora/serverless/) database (PostgreSQL) is used to store MLflow data. 153 | - An [App Runner](https://aws.amazon.com/apprunner/) that will run the MLflow Tracking Server. 154 | - (Optional) A set of network resources such as [VPC](https://aws.amazon.com/vpc/), [Subnet](https://aws.amazon.com/ec2/subnets/), and [Security group](https://aws.amazon.com/ec2/security-groups/). 155 | 156 | ### Heroku 157 | 158 | #### Prerequisites 159 | 160 | - [Heroku Account](https://dashboard.heroku.com/) 161 | - [AWS Account](https://console.aws.amazon.com/console/) 162 | - The [Heroku](#heroku) deployment will use an Amazon S3 bucket for storing the MLflow tracking data. 163 | - [AWS CLI](https://aws.amazon.com/cli/) 164 | - [Terraform CLI](https://www.terraform.io/downloads.html) 165 | 166 | 1. [Create an AWS account](https://aws.amazon.com/free/) if you don't already have one. 167 | 168 | 2. Configure AWS CLI to use your AWS account. 169 | 170 | 3. Clone this repository. 171 | 172 | ```bash 173 | git clone https://github.com/DougTrajano/mlflow-server.git 174 | ``` 175 | 176 | 4. Open `mlflow-server/terraform` folder. 177 | 178 | ```bash 179 | cd mlflow-server/terraform 180 | ``` 181 | 182 | 5. Run the following command to create only the S3 bucket 183 | 184 | ```bash 185 | terraform init 186 | terraform apply -var environment="heroku" -target="module.s3" 187 | ``` 188 | 189 | 6. Type "yes" when prompted to continue. 190 | 191 | ```log 192 | Plan: 5 to add, 0 to change, 0 to destroy. 193 | 194 | Changes to Outputs: 195 | + artifact_bucket_id = (known after apply) 196 | 197 | Do you want to perform these actions? 198 | Terraform will perform the actions described above. 199 | Only 'yes' will be accepted to approve. 200 | 201 | Enter a value: yes 202 | ``` 203 | 204 | 2. Create an IAM Policy for the S3 bucket as follows: 205 | 206 |
IAM Policy example 207 |

208 | 209 | ```json 210 | { 211 | "Version": "2012-10-17", 212 | "Statement": [ 213 | { 214 | "Effect": "Allow", 215 | "Action": [ 216 | "s3:ListBucket" 217 | ], 218 | "Resource": "arn:aws:s3:::mlflow-heroku-20220723133820303500000001" 219 | }, 220 | { 221 | "Effect": "Allow", 222 | "Action": [ 223 | "s3:*", 224 | "s3-object-lambda:*" 225 | ], 226 | "Resource": "arn:aws:s3:::mlflow-heroku-20220723133820303500000001/*" 227 | } 228 | ] 229 | } 230 | ``` 231 | 232 |

233 |
234 | 235 | 3. Create an IAM User and attach the IAM Policy previously created. 236 | 237 | > Take note of the IAM User access key and secret key, you'll need them in the step 5. 238 | 239 | 4. Click on the "Deploy to Heroku" button below. 240 | 241 | [![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/DougTrajano/mlflow-server/tree/main) 242 | 243 | 5. Follow the instructions on the new page to create an MLflow Tracking Server. 244 | 245 | ### Local 246 | 247 | #### Prerequisites 248 | 249 | - [Docker](https://www.docker.com/) and [Docker Compose](https://docs.docker.com/compose/install/). 250 | 251 | 1. Clone this repository. 252 | 253 | ```bash 254 | git clone https://github.com/DougTrajano/mlflow-server.git 255 | ``` 256 | 257 | 2. Open the `mlflow-server` folder. 258 | 259 | ```bash 260 | cd mlflow-server 261 | ``` 262 | 263 | 3. Run the following command to create all the required resources: 264 | 265 | ```bash 266 | docker-compose up -d --build 267 | ``` 268 | 269 | ## Using your deployed MLflow 270 | 271 | The link that you will use to access the MLflow Tracking Server will depend on the deployment method you choose. 272 | 273 | - For [AWS](#aws), the link will be something like `https://XXXXXXXXX.aws-region.awsapprunner.com/`. 274 | - You can find it in the [AWS App Runner console](https://us-east-1.console.aws.amazon.com/apprunner/home). 275 | - For [Heroku](#heroku), the link will be something like `https://XXXXXXXXX.herokuapp.com/`. 276 | - You can find it in the [Heroku dashboard](https://dashboard.heroku.com/apps/). 277 | - For [Local](#local), the link will be something like `http://localhost:80/`. 278 | 279 | ![](docs/images/mlflow_ui.png) 280 | 281 | Also, you can track your experiments using MLflow API. 282 | 283 | ```python 284 | import os 285 | import mlflow 286 | 287 | os.environ["MLFLOW_TRACKING_URI"] = "<>" 288 | os.environ["MLFLOW_EXPERIMENT_NAME"] = "<>" 289 | os.environ["MLFLOW_TRACKING_USERNAME"] = "<>" 290 | os.environ["MLFLOW_TRACKING_PASSWORD"] = "<>" 291 | 292 | # AWS AK/SK are required to upload artifacts to S3 Bucket 293 | os.environ["AWS_ACCESS_KEY_ID"] = "<>" 294 | os.environ["AWS_SECRET_ACCESS_KEY"] = "<>" 295 | 296 | SEED = 1993 297 | 298 | mlflow.start_run() 299 | mlflow.log_param("seed", SEED) 300 | mlflow.end_run() 301 | ``` 302 | 303 | ## References 304 | 305 | - [Managing your machine learning lifecycle with MLflow and Amazon SageMaker | AWS Machine Learning Blog](https://aws.amazon.com/pt/blogs/machine-learning/managing-your-machine-learning-lifecycle-with-mlflow-and-amazon-sagemaker/) 306 | - [Introducing AWS App Runner](https://aws.amazon.com/pt/blogs/containers/introducing-aws-app-runner/) 307 | - [MLflow Documentation](https://www.mlflow.org/docs/latest/index.html) (current version: 1.27.0) 308 | - [soundsensing/mlflow-easyauth: Deploy MLflow with HTTP basic authentication using Docker](https://github.com/soundsensing/mlflow-easyauth) 309 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mlflow-server", 3 | "description": "MLflow Tracking Server with basic authentication (user/password).", 4 | "logo": "https://avatars.githubusercontent.com/u/39938107?v=4", 5 | "keywords": [ 6 | "machine learning", 7 | "data science", 8 | "machinelearning", 9 | "mlflow", 10 | "experiment tracking" 11 | ], 12 | "repository": "https://github.com/DougTrajano/mlflow-server", 13 | "stack": "container", 14 | "success_url": "/", 15 | "env": { 16 | "MLFLOW_TRACKING_USERNAME": { 17 | "description": "Username for MLflow UI and API.", 18 | "value": "mlflow" 19 | }, 20 | "MLFLOW_TRACKING_PASSWORD": { 21 | "description": "Password for MLflow UI and API.", 22 | "value": "mlflow" 23 | }, 24 | "MLFLOW_ARTIFACT_URI": { 25 | "description": "URI for mlflow artifacts", 26 | "value": "./mlruns" 27 | }, 28 | "AWS_ACCESS_KEY_ID": { 29 | "description": "AWS access key ID", 30 | "required": false 31 | }, 32 | "AWS_SECRET_ACCESS_KEY": { 33 | "description": "AWS secret access key", 34 | "required": false 35 | } 36 | }, 37 | "addons": [ 38 | { 39 | "plan": "heroku-postgresql" 40 | } 41 | ] 42 | } -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | postgres: 5 | image: postgres:15.3-alpine3.18 6 | restart: unless-stopped 7 | ports: 8 | - ${POSTGRES_PORT-5432}:${POSTGRES_PORT-5432} 9 | volumes: 10 | - postgres-db-volume:/var/lib/postgresql/data 11 | environment: 12 | POSTGRES_DB: ${POSTGRES_DB_NAME-postgres} 13 | POSTGRES_USER: ${POSTGRES_USER-postgres} 14 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD-postgres} 15 | healthcheck: 16 | test: ["CMD", "pg_isready", "-U", "postgres"] 17 | interval: 5s 18 | retries: 5 19 | 20 | mlflow: 21 | container_name: mlflow 22 | build: 23 | context: . 24 | ports: 25 | - ${MLFLOW_PORT-80}:${MLFLOW_PORT-80} 26 | depends_on: 27 | - postgres 28 | environment: 29 | PORT: ${MLFLOW_PORT-80} 30 | AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID} 31 | AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY} 32 | MLFLOW_ARTIFACT_URI: ${MLFLOW_ARTIFACT_URI-./mlruns} 33 | MLFLOW_DB_DATABASE: ${POSTGRES_DB_NAME-postgres} 34 | MLFLOW_DB_DIALECT: postgresql 35 | MLFLOW_DB_HOST: postgres 36 | MLFLOW_DB_PASSWORD: ${POSTGRES_PASSWORD-postgres} 37 | MLFLOW_DB_PORT: ${POSTGRES_PORT-5432} 38 | MLFLOW_DB_USERNAME: ${POSTGRES_USER-postgres} 39 | MLFLOW_TRACKING_USERNAME: ${MLFLOW_TRACKING_USERNAME-mlflow,user} 40 | MLFLOW_TRACKING_PASSWORD: ${MLFLOW_TRACKING_PASSWORD-mlflow,pass} 41 | MLFLOW_SQLALCHEMYSTORE_POOLCLASS: NullPool 42 | 43 | volumes: 44 | postgres-db-volume: -------------------------------------------------------------------------------- /docs/diagrams.drawio: -------------------------------------------------------------------------------- 1 | 7Vvbcts2EP0azbQP1pDgTXy0Lk6TOK1rdZL0yUOREMWEIjQgZEn5+i5IQLyAtC6WbKWx5EyIBbAAd89ZAAu7Ywzm63fUW8w+kQDHHaQF644x7CCkmwh1+I8WbHJJz7ZyQUijIBdphWAc/cCip5QuowCnQpaLGCExixZVoU+SBPusIvMoJatqsymJg4pg4YVYEYx9L1alX6KAzYRUt92i4g8chTMxdA85ecXE87+HlCwTMV4HGfoAITTKq+ee1CVeNJ15AVmVRMaoYwwoISx/mq8HOOa2rZrtpqV2O2+KE7ZPB81YJfPlWP9yc+Nd3b/7tnL/fLgyermaRy9eYvke2WzZRlqIv+RCHU1M4BFThtdNrvImUkPxugAjTOaY0Q20k70sYSGBIDB+Xl4V/ug5os2s5ApTswUOBAbCre7CDvAgTHGAWUzFKtdfxiAYxGQZKAYiSxZHCR5sEap1jP6MzWHIoQ6Pq1nE8Hjh+bz9CmgEsilJmCCDjmRZqOTdAS0L/jxfh5x3XW+Vmt3cE0Y/pO/Bxo21D/D44GfTBCWMku8wr5jQbKZG4LgTjaufRnFckk+n2PZ9kHNvRkCO6zgK+QCM8PE8UYrxlHG18CpREt5mpaHB9QVeOsOBmHsDMp/EXh1BrUgxJApKyLC0RmRoZ0KGbqnQWAA7tPsleJ/Cw+e7wWUj5HHhN2GjhyaGbavYCCzcC8zLx4ZbiyKaihXTbsCK3jsXVpAClTFZUvAxBBJYRxWUVAy1CzLS7j4YEnBn9GNvguM7kkYsIpUK6bfbWoMJYYzMWx1bx9cKT2ISkrQbRmy2nDQEEYt/m6BlZx+B4ZJ8ZPPvOVGhu04FFT0VFE5PxYSUnT586Aomhh7zQDL2I3j5KGXpftEDnBLwHtKcCUlwk+3he8MnqIZ8/qnT80isKGAsAXVXHPPSRf5W02iNg7bARnGacScPa30oNgW4ZYpp+qI4k9HGUvcsLwssNdj8Q70ogVgM0g9kosKqbucUfJh2U45DH3eTpR9jjz6gqjcXJOIuHj2CBVPp4YPjwKlg9uJ+1pH7yn5u2JpW9h92zJfhCX8K+dNvn26nMZw7wPWYwov+ruAgc2k2T6sPPxAhB/k/C2oHXNJFVoOwSeaoQl1tBv/pTSPUhU0yRxXqajNekrOuCptkjqXOuN5bb+it13rDT8vCXQPqNijXozjU3Ti9kWaW6oYRBUU5QRJCOezqlBtq1kB3mij3vwzzU4+GHsPn3DhsjxWXEuCRGuAVJuMgxNLmHCiwXUu8eFRIyw7DSXDN8yfc7THxv3NRPMnK0vsZoDzKZDuxx4CeN1Es9bRH+Sw5ssVc2ypxoAtzfOyxEsLEQ/yURhlKudWehETJ5bqEBcWxx6LHahKpyelC3R0PsgW8kGNV4eXWDiH5a4pe5axOTZGxS1FuBkURuNTblJqJRaB1wrphV8bR7d6T86q3l/MqKJDPoCDE1uDHc8S4AI7IdpmwRJMdpLFMq2+bKmlaNjjAFbr5KhplhX95oWvJ4nBdrhxuyqU7TCMwNY/gx5zJdzNQ5nEvmoG1AG+YNRV7M3CXohYGngz06o7wZKCXAf9IzD8b5euIfZWzgucSxqFUQJwXKgi/VGbI08NFM2ObEJOAllchhzID7VJ0prXJcPULXJuc3TRtuWk5bEu7//2LNJe135nWdNqR9LytrbvbNOpVRvbJzxH5vVoRceRFnl6JNFnWdJKSeMnwNfXlYYVLtyX0RDRSTj9gZ+ZFycHRwmnxXHvWwdnLQca5EtlScTnpcPde8VHliFkFYcMBwDYdu9d4ZjV0E1aKhiNi6C9Qd4bX4nS4kJccAX7EMVlg+rCA1S1bn8ALTHi3nhinucVaD8HzKAiyNbF2tVE/E2/bVe885IJWPgyrh9+8i2iezbBPaIBpLZ2b12xvmmu1pUVbKOyLA/zwymjIt5tD/j0LUuWxuRJYdbdrKchFva4MVGXwyq77r1PVsa6KwaQKMp2m+Ll7rvDxY4jwt8/o88ePg7//0uiHd9aVmsV/u/XbfevXsFTtucztfetnIgVvzZd+ptsOt2fFyob74bn3A7yCtNHgXsHIW/L1Lfn6cyVfsU/PmXg1tWpcf+0bW3XbvuXz2Hij82XT2dauDcM5jM7IcXTd/mXonBrnZLNjaxfFZvWkuWXz/XD8RufLprM5dKDyMDrDx+y7vwydaZCek8+2zDZfCJ/RGZPfZ77xabkmPSrnDYV66vqkefAXu7sV7tyZHxe7stdIj9t6dUkz62m3fdPjzi5FJ0qPO+jJdPfO9rru1nh6hvS4zOb8tL/ecCrevhTVrD2Z9qp3tLXszpV1JNXqaSJF0YmoZhnuWanTmIw8hihV7BZtbgnfzWQg/oYZ24hti7dkpE6vF1he2rnXiMlXWYncZjbsjfznbX/Ui5mfK2q+VLRDe3rTupxgp6MTBTtF0dG/kALF4u8V8+bFH4Uao/8A -------------------------------------------------------------------------------- /docs/images/architecture_mlflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DougTrajano/mlflow-server/23a84b833cc305c4f8a81012ef406ed83d67eb86/docs/images/architecture_mlflow.png -------------------------------------------------------------------------------- /docs/images/mlflow_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DougTrajano/mlflow-server/23a84b833cc305c4f8a81012ef406ed83d67eb86/docs/images/mlflow_ui.png -------------------------------------------------------------------------------- /heroku.yml: -------------------------------------------------------------------------------- 1 | build: 2 | docker: 3 | web: Dockerfile 4 | 5 | setup: 6 | addons: 7 | - plan: heroku-postgresql 8 | as: DATABASE -------------------------------------------------------------------------------- /nginx.conf: -------------------------------------------------------------------------------- 1 | #user www; 2 | #worker_processes 1; 3 | #pid /var/run/nginx.pid; 4 | 5 | events { 6 | multi_accept on; 7 | } 8 | 9 | http { 10 | 11 | server { 12 | listen 0.0.0.0:${PORT}; 13 | server_name mlflow; 14 | 15 | location = /health { 16 | auth_basic "off"; 17 | proxy_pass http://0.0.0.0:5001; 18 | proxy_set_header Host "mlflow"; 19 | } 20 | 21 | # Forward to MLFlow 22 | location / { 23 | # HTTP Basic auth 24 | auth_basic "Login required"; 25 | auth_basic_user_file /etc/nginx/.htpasswd; 26 | 27 | proxy_pass http://0.0.0.0:5001; 28 | proxy_set_header Host "mlflow"; 29 | } 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mlflow==2.4.1 2 | pymysql==1.0.3 3 | psycopg2-binary==2.9.6 4 | protobuf==4.23.3 5 | gevent==22.10.2 6 | boto3==1.26.155 7 | -------------------------------------------------------------------------------- /scripts/mlflow.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | if [[ "${PORT}" == 5001 ]]; then 3 | echo "PORT can not be set to the value of 5001. Please, select other port." 4 | exit 1 5 | fi 6 | 7 | if [[ -z "${MLFLOW_ARTIFACT_URI}" ]]; then 8 | echo "MLFLOW_ARTIFACT_URI can not be set. Define default value as ./mlruns" 9 | export MLFLOW_ARTIFACT_URI="./mlruns" 10 | fi 11 | 12 | if [[ -n "${DATABASE_URL}" ]]; then 13 | # Heroku uses "postgres" dialect, but we want to use "postgresql" 14 | # so we will update MLFLOW_BACKEND_URI to use "postgresql" dialect. 15 | # https://www.mlflow.org/docs/latest/tracking.html#where-runs-are-recorded 16 | export MLFLOW_BACKEND_URI="${DATABASE_URL/postgres/postgresql}" 17 | unset DATABASE_URL 18 | fi 19 | 20 | if [[ -z "${MLFLOW_BACKEND_URI}" ]]; then 21 | echo "MLFLOW_BACKEND_URI not set. Define default value based on other variables." 22 | 23 | if [[ -z "${MLFLOW_DB_DIALECT}" ]]; then 24 | export MLFLOW_DB_DIALECT="postgresql" 25 | fi 26 | 27 | if [[ -z "${MLFLOW_DB_USERNAME}" ]]; then 28 | export MLFLOW_DB_USERNAME="mlflow" 29 | fi 30 | 31 | if [[ -z "${MLFLOW_DB_PASSWORD}" ]]; then 32 | export MLFLOW_DB_PASSWORD="mlflow" 33 | fi 34 | 35 | if [[ -z "${MLFLOW_DB_DATABASE}" ]]; then 36 | export MLFLOW_DB_DATABASE="mlflow" 37 | fi 38 | 39 | if [[ -z "${MLFLOW_DB_PORT}" ]]; then 40 | export MLFLOW_DB_PORT=3306 41 | fi 42 | 43 | export MLFLOW_BACKEND_URI=${MLFLOW_DB_DIALECT}://${MLFLOW_DB_USERNAME}:${MLFLOW_DB_PASSWORD}@${MLFLOW_DB_HOST}:${MLFLOW_DB_PORT}/${MLFLOW_DB_DATABASE} 44 | unset MLFLOW_DB_DIALECT 45 | unset MLFLOW_DB_USERNAME 46 | unset MLFLOW_DB_PASSWORD 47 | unset MLFLOW_DB_DATABASE 48 | unset MLFLOW_DB_HOST 49 | unset MLFLOW_DB_PORT 50 | fi 51 | 52 | echo "Upgrading database..." 53 | mlflow db upgrade ${MLFLOW_BACKEND_URI} 54 | 55 | echo "Starting mlflow server" 56 | 57 | exec mlflow server --host 0.0.0.0 --port 5001 \ 58 | --default-artifact-root "${MLFLOW_ARTIFACT_URI}" \ 59 | --backend-store-uri "${MLFLOW_BACKEND_URI}" \ 60 | --serve-artifacts \ 61 | --gunicorn-opts "--worker-class gevent --threads 2 --workers 2 --timeout 300 --keep-alive 300" -------------------------------------------------------------------------------- /scripts/nginx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | if [[ -z "${MLFLOW_TRACKING_USERNAME}" ]]; then 3 | export MLFLOW_TRACKING_USERNAME="mlflow" 4 | fi 5 | 6 | if [[ -z "${MLFLOW_TRACKING_PASSWORD}" ]]; then 7 | export MLFLOW_TRACKING_PASSWORD="mlflow" 8 | fi 9 | 10 | if [[ -z "${PORT}" ]]; then 11 | export PORT=80 12 | fi 13 | 14 | echo "Replacing variables in nginx.conf" 15 | envsubst '${PORT}' < /app/nginx.conf > /etc/nginx/nginx.conf 16 | 17 | echo "Adding basic auth to nginx" 18 | mkdir -p /etc/nginx 19 | touch /etc/nginx/.htpasswd 20 | 21 | # Replace commas with newlines 22 | string1=$(echo ${MLFLOW_TRACKING_USERNAME} | tr ',' '\n') 23 | string2=$(echo ${MLFLOW_TRACKING_PASSWORD} | tr ',' '\n') 24 | 25 | # Loop over both strings in parallel 26 | while read val1 && read val2 <&3; do 27 | htpasswd -b /etc/nginx/.htpasswd ${val1} ${val2} 28 | done <<< "$string1" 3<<< "$string2" 29 | 30 | # htpasswd -b -c /etc/nginx/.htpasswd ${MLFLOW_TRACKING_USERNAME} ${MLFLOW_TRACKING_PASSWORD} 31 | 32 | echo "Starting nginx" 33 | exec nginx -g "daemon off;" 34 | -------------------------------------------------------------------------------- /supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | 4 | [program:mlflow] 5 | command=/app/scripts/mlflow.sh 6 | stdout_logfile=/dev/stdout 7 | stdout_logfile_maxbytes=0 8 | stderr_logfile=/dev/stderr 9 | stderr_logfile_maxbytes=0 10 | autorestart=true 11 | 12 | [program:nginx] 13 | command=/app/scripts/nginx.sh 14 | stdout_logfile=/dev/stdout 15 | stdout_logfile_maxbytes=0 16 | stderr_logfile=/dev/stderr 17 | stderr_logfile_maxbytes=0 18 | startretries=10 19 | autorestart=true -------------------------------------------------------------------------------- /terraform/README.md: -------------------------------------------------------------------------------- 1 | # Terraform stack 2 | 3 | > Terraform v1.0.1 on linux_amd64 4 | 5 | This directory contains the Terraform configuration for the MLflow Server stack. 6 | 7 | ## Terraform configuration 8 | 9 | The Terraform configuration is based on the [Terraform documentation](https://www.terraform.io/docs/index.html) and the [AWS provider](https://www.terraform.io/docs/providers/aws/index.html). 10 | 11 | The Terraform configuration is split into two files: 12 | 13 | - `iam.tf`: AWS IAM configuration. 14 | - `locals.tf`: The Terraform locals file. 15 | - `network.tf`: The network configuration. 16 | - `outputs.tf`: The outputs configuration. 17 | - `providers.tf`: The Terraform providers file. 18 | - `rds.tf`: AWS RDS database configuration (Backend store). 19 | - `s3.tf`: AWS S3 configuration (Artifact store). 20 | - `server.tf`: AWS App Runner configuration (MLflow Server). 21 | - `variables.tf`: The Terraform variables file. 22 | 23 | ## Terraform variables 24 | 25 | The Terraform variables file contains the following variables: 26 | 27 | - `name` - (Optional) The name of the stack. Defaults to `mlflow`. 28 | - `environment` - (Optional) The environment of the stack. Defaults to `dev`. 29 | - `region` - (Optional) The AWS region. Defaults to `us-east-1`. 30 | - `tags` - (Optional) The tags to apply to the stack. Defaults to `{}`. 31 | - `vpc_id` - (Optional) The VPC ID. Defaults to `null` which means that the VPC, subnets, securitiy groups will be created. 32 | - `vpc_security_group_ids` - (Optional) The VPC security group IDs. Defaults to `null`, it will be used only if vpc_id is set. 33 | - `service_cpu` - (Optional) The number of CPU cores to allocate to the MLflow Server. Defaults to `1024`. 34 | - `service_memory` - (Optional) The amount of memory to allocate to the MLflow Server. Defaults to `2048`. 35 | - `mlflow_username` - (Optional) The username to use for the MLflow Server. Defaults to `mlflow`. 36 | - `mlflow_password` - (Optional) The password to use for the MLflow Server. Defaults to `mlflow`. 37 | - `artifact_bucket_id` - (Optional) The S3 bucket ID to use for the MLflow Server artifact store. If specified, MLflow will use this bucket to store artifacts. Otherwise, this module will create a dedicated bucket. 38 | - `db_skip_final_snapshot` - (Optional) Whether to skip creating a final DB snapshot. Default is `false`. 39 | - `db_deletion_protection` - (Optional) Whether to enable deletion protection on the DB instance. Default is `true`. 40 | - `db_instance_class` - (Optional) The DB instance class to use. Defaults to `db.t2.micro`. 41 | - `db_subnet_ids` - (Optional) The DB subnet IDs. Defaults to `null`, it will be used only if vpc_id is set. 42 | - `db_auto_pause` - (Optional) Whether to automatically pause the DB instance when it's not in use. Defaults to `true`. 43 | - `db_auto_pause_seconds` - (Optional) The number of seconds to wait before pausing the DB instance. Defaults to `1800`. 44 | - `db_min_capacity` - (Optional) The minimum capacity of the DB instance. Defaults to `2`. 45 | - `db_max_capacity` - (Optional) The maximum capacity of the DB instance. Defaults to `64`. 46 | 47 | ## Terraform providers 48 | 49 | The Terraform providers file contains the following providers: 50 | 51 | - `aws`: The AWS provider. 52 | 53 | ## Terraform locals 54 | 55 | The Terraform locals file contains the following locals: 56 | 57 | - `name` - The name of the stack. (e.g. `{name}-{environment}`) 58 | - `availability_zones` - The availability zones for the region. 59 | - `db_username` - The username to use for the MLflow Server database. 60 | - `db_password` - The password to use for the MLflow Server database. 61 | - `db_port` - The port to use for the MLflow Server database. 62 | - `create_dedicated_bucket` - Whether to create a dedicated S3 bucket for the MLflow Server artifact store. 63 | - `db_subnet_ids` - The DB subnet IDs. 64 | - `create_dedicated_vpc` - Whether to create a dedicated VPC. 65 | - `vpc_id` - The VPC ID. 66 | - `create_dedicated_bucket` - Whether to create a dedicated S3 bucket for the MLflow Server artifact store. 67 | - `artifact_bucket_id` - The S3 bucket ID to use for the MLflow Server artifact store. 68 | - `app_port` - The port to use for the MLflow Server. 69 | - `create_mlflow_password` - Whether to create a password for the MLflow Server. 70 | - `mlflow_password` - The password to use for the MLflow Server. 71 | - `tags` - The tags to apply to the stack (Add `Name` and `Environment` tags). 72 | 73 | ## Terraform outputs 74 | 75 | The Terraform outputs file contains the following outputs: 76 | 77 | - `artifact_bucket_id` - The S3 bucket ID to use for the MLflow Server artifact store. 78 | - `service_url` - The URL to the MLflow Server. 79 | - `mlflow_username` - The username to use for the MLflow Server. 80 | - `mlflow_password` - The password to use for the MLflow Server. 81 | - `status` - The status of the MLflow Server service. -------------------------------------------------------------------------------- /terraform/iam.tf: -------------------------------------------------------------------------------- 1 | resource "aws_iam_role" "mlflow_iam_role" { 2 | name = "${local.name}-role" 3 | 4 | assume_role_policy = jsonencode({ 5 | Version = "2012-10-17" 6 | Statement = [ 7 | { 8 | Action = "sts:AssumeRole" 9 | Principal = { 10 | Service = "build.apprunner.amazonaws.com" 11 | } 12 | Effect = "Allow" 13 | }, 14 | { 15 | Action = "sts:AssumeRole" 16 | Principal = { 17 | Service = "tasks.apprunner.amazonaws.com" 18 | } 19 | Effect = "Allow" 20 | } 21 | ] 22 | }) 23 | 24 | tags = merge( 25 | { 26 | Name = "${local.name}-role" 27 | }, 28 | local.tags 29 | ) 30 | } 31 | 32 | resource "aws_iam_role_policy" "mlflow_bucket_policy" { 33 | count = local.create_dedicated_bucket ? 1 : 0 34 | name_prefix = "access_to_mlflow_bucket" 35 | role = aws_iam_role.mlflow_iam_role.id 36 | 37 | policy = jsonencode({ 38 | Version = "2012-10-17" 39 | Statement = [ 40 | { 41 | Effect = "Allow" 42 | Action = [ 43 | "s3:ListBucket", 44 | "s3:HeadBucket", 45 | ] 46 | Resource = concat( 47 | module.s3.artifact_bucket_arn, 48 | ) 49 | }, 50 | { 51 | Effect = "Allow" 52 | Action = [ 53 | "s3:ListBucketMultipartUploads", 54 | "s3:GetBucketTagging", 55 | "s3:GetObjectVersionTagging", 56 | "s3:ReplicateTags", 57 | "s3:PutObjectVersionTagging", 58 | "s3:ListMultipartUploadParts", 59 | "s3:PutObject", 60 | "s3:GetObject", 61 | "s3:GetObjectAcl", 62 | "s3:GetObject", 63 | "s3:AbortMultipartUpload", 64 | "s3:PutBucketTagging", 65 | "s3:GetObjectVersionAcl", 66 | "s3:GetObjectTagging", 67 | "s3:PutObjectTagging", 68 | "s3:GetObjectVersion", 69 | ] 70 | Resource = [ 71 | for bucket in concat(module.s3.artifact_bucket_arn) : 72 | "${bucket}/*" 73 | ] 74 | }, 75 | ] 76 | }) 77 | } -------------------------------------------------------------------------------- /terraform/locals.tf: -------------------------------------------------------------------------------- 1 | data "aws_availability_zones" "available" { 2 | state = "available" 3 | } 4 | 5 | locals { 6 | # General 7 | name = "${var.name}-${var.environment}" 8 | 9 | availability_zones = slice(data.aws_availability_zones.available.names, 0, 3) 10 | 11 | # RDS database 12 | db_username = "mlflow" 13 | db_database = "mlflow" 14 | db_port = 5432 15 | db_subnet_ids = local.create_dedicated_vpc ? aws_subnet.mlflow_public_subnet.*.id : var.db_subnet_ids 16 | 17 | # VPC and subnets 18 | create_dedicated_vpc = var.vpc_id == null 19 | vpc_id = local.create_dedicated_vpc ? aws_vpc.mlflow_vpc.0.id : var.vpc_id 20 | 21 | # S3 bucket 22 | create_dedicated_bucket = var.artifact_bucket_id == null 23 | artifact_bucket_id = local.create_dedicated_bucket ? module.s3.artifact_bucket_id : var.artifact_bucket_id 24 | 25 | # App Runner 26 | app_port = 8080 27 | create_mlflow_password = var.mlflow_password == null 28 | mlflow_password = local.create_mlflow_password ? random_password.mlflow_password.result : var.mlflow_password 29 | 30 | tags = merge( 31 | { 32 | "Environment" = "${var.environment}" 33 | }, 34 | var.tags 35 | ) 36 | } -------------------------------------------------------------------------------- /terraform/network.tf: -------------------------------------------------------------------------------- 1 | resource "aws_vpc" "mlflow_vpc" { 2 | count = local.create_dedicated_vpc ? 1 : 0 3 | cidr_block = "10.0.0.0/16" 4 | enable_dns_hostnames = true 5 | enable_dns_support = true 6 | 7 | tags = { 8 | Name = "${local.name}-vpc" 9 | } 10 | } 11 | 12 | resource "aws_subnet" "mlflow_public_subnet" { 13 | count = local.create_dedicated_vpc ? length(local.availability_zones) : 0 14 | vpc_id = local.vpc_id 15 | cidr_block = "10.0.${10+count.index}.0/24" 16 | availability_zone = "${data.aws_availability_zones.available.names[count.index]}" 17 | 18 | tags = { 19 | Name = "${local.name}-public-subnet" 20 | } 21 | } 22 | 23 | resource "aws_internet_gateway" "mlflow_gateway" { 24 | count = local.create_dedicated_vpc ? 1 : 0 25 | vpc_id = local.vpc_id 26 | 27 | tags = { 28 | Name = "${local.name}-igw" 29 | } 30 | } 31 | 32 | resource "aws_route_table" "mlflow_crt" { 33 | count = local.create_dedicated_vpc ? 1 : 0 34 | vpc_id = local.vpc_id 35 | 36 | route { 37 | cidr_block = "0.0.0.0/0" 38 | gateway_id = aws_internet_gateway.mlflow_gateway.0.id 39 | } 40 | 41 | tags = { 42 | Name = "${local.name}-crt" 43 | } 44 | } 45 | 46 | resource "aws_route_table_association" "mlflow_crt_association" { 47 | count = local.create_dedicated_vpc ? 1 : 0 48 | subnet_id = aws_subnet.mlflow_public_subnet.0.id 49 | route_table_id = aws_route_table.mlflow_crt.0.id 50 | } 51 | 52 | resource "aws_vpc_endpoint" "mlflow_endpoint" { 53 | count = local.create_dedicated_vpc ? 1 : 0 54 | vpc_id = local.vpc_id 55 | 56 | service_name = "com.amazonaws.${var.aws_region}.s3" 57 | vpc_endpoint_type = "Gateway" 58 | route_table_ids = [aws_route_table.mlflow_crt.0.id] 59 | 60 | tags = { 61 | Name = "${local.name}-endpoint" 62 | } 63 | } -------------------------------------------------------------------------------- /terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "artifact_bucket_id" { 2 | value = local.artifact_bucket_id 3 | } 4 | 5 | output "service_url" { 6 | value = "https://${aws_apprunner_service.mlflow_server.service_url}" 7 | } 8 | 9 | output "mlflow_username" { 10 | value = var.mlflow_username 11 | } 12 | 13 | output "mlflow_password" { 14 | value = local.mlflow_password 15 | sensitive = true 16 | } 17 | 18 | output "status" { 19 | value = aws_apprunner_service.mlflow_server.status 20 | } -------------------------------------------------------------------------------- /terraform/providers.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | aws = { 4 | source = "hashicorp/aws" 5 | version = "~> 4.11.0" 6 | } 7 | 8 | } 9 | 10 | required_version = ">= 0.14.9" 11 | 12 | backend "s3" {} 13 | # backend "local" {} 14 | # The backend configuration will be added in the terraform init command. 15 | ## terraform init -backend-config "profile=default" \ 16 | ## -backend-config "bucket=terraform-states" \ 17 | ## -backend-config "key=mlflow-server/terraform.tfstate" \ 18 | ## -backend-config "region=us-east-1" \ 19 | ## -backend-config "dynamodb_table=terraform-locks" \ 20 | ## -backend-config "encrypt=true" 21 | } 22 | 23 | provider "aws" { 24 | profile = var.aws_profile 25 | region = var.aws_region 26 | } -------------------------------------------------------------------------------- /terraform/rds.tf: -------------------------------------------------------------------------------- 1 | resource "random_password" "mlflow_backend_store" { 2 | length = 16 3 | special = true 4 | 5 | # Added this because random password was generating a password that had chars that 6 | # aurora didnt allow. With the lifecycle this shouldnt impact existing passwords that 7 | # happened to generate ok. 8 | override_special = "_+=()" 9 | lifecycle { 10 | ignore_changes = [override_special] 11 | } 12 | } 13 | 14 | resource "aws_db_subnet_group" "rds" { 15 | name = "${local.name}-rds-subnet-group" 16 | subnet_ids = local.db_subnet_ids 17 | } 18 | 19 | resource "aws_rds_cluster" "mlflow_backend_store" { 20 | cluster_identifier = "${local.name}-rds" 21 | engine = "aurora-postgresql" 22 | engine_mode = "serverless" 23 | port = local.db_port 24 | db_subnet_group_name = aws_db_subnet_group.rds.name 25 | vpc_security_group_ids = [aws_security_group.mlflow_server_sg.0.id] 26 | availability_zones = local.availability_zones 27 | database_name = local.db_database 28 | master_username = local.db_username 29 | master_password = random_password.mlflow_backend_store.result 30 | backup_retention_period = 5 31 | preferred_backup_window = "04:00-06:00" 32 | final_snapshot_identifier = "mlflow-db-backup" 33 | skip_final_snapshot = var.db_skip_final_snapshot 34 | deletion_protection = var.db_deletion_protection 35 | apply_immediately = true 36 | 37 | scaling_configuration { 38 | min_capacity = var.db_min_capacity 39 | max_capacity = var.db_max_capacity 40 | auto_pause = var.db_auto_pause 41 | seconds_until_auto_pause = var.db_auto_pause_seconds 42 | } 43 | 44 | tags = merge( 45 | { 46 | Name = "${local.name}-rds" 47 | }, 48 | local.tags 49 | ) 50 | } -------------------------------------------------------------------------------- /terraform/s3.tf: -------------------------------------------------------------------------------- 1 | module "s3" { 2 | source = "./s3" 3 | name = local.name 4 | tags = local.tags 5 | create_dedicated_bucket = local.create_dedicated_bucket 6 | } 7 | -------------------------------------------------------------------------------- /terraform/s3/main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_s3_bucket_versioning" "versioning" { 2 | count = var.create_dedicated_bucket ? 1 : 0 3 | bucket = aws_s3_bucket.mlflow_artifact_store.0.id 4 | 5 | versioning_configuration { 6 | status = "Enabled" 7 | } 8 | } 9 | 10 | resource "aws_s3_bucket_intelligent_tiering_configuration" "bucket_lifecycle" { 11 | count = var.create_dedicated_bucket ? 1 : 0 12 | name = "${var.name}-bucket-lifecycle" 13 | bucket = aws_s3_bucket.mlflow_artifact_store.0.bucket 14 | 15 | tiering { 16 | access_tier = "DEEP_ARCHIVE_ACCESS" 17 | days = 180 18 | } 19 | tiering { 20 | access_tier = "ARCHIVE_ACCESS" 21 | days = 90 22 | } 23 | } 24 | 25 | resource "aws_s3_bucket_server_side_encryption_configuration" "bucket_encryption" { 26 | count = var.create_dedicated_bucket ? 1 : 0 27 | bucket = aws_s3_bucket.mlflow_artifact_store.0.bucket 28 | 29 | rule { 30 | apply_server_side_encryption_by_default { 31 | sse_algorithm = "AES256" 32 | } 33 | } 34 | } 35 | 36 | resource "aws_s3_bucket_acl" "bucket_acl" { 37 | count = var.create_dedicated_bucket ? 1 : 0 38 | bucket = aws_s3_bucket.mlflow_artifact_store.0.id 39 | acl = "private" 40 | } 41 | 42 | resource "aws_s3_bucket" "mlflow_artifact_store" { 43 | count = var.create_dedicated_bucket ? 1 : 0 44 | bucket_prefix = "${var.name}-" 45 | force_destroy = true 46 | 47 | tags = merge( 48 | { 49 | Name = "${var.name}-bucket" 50 | }, 51 | var.tags 52 | ) 53 | } -------------------------------------------------------------------------------- /terraform/s3/outputs.tf: -------------------------------------------------------------------------------- 1 | output "artifact_bucket_id" { 2 | value = aws_s3_bucket.mlflow_artifact_store.0.id 3 | } 4 | 5 | output "artifact_bucket_arn" { 6 | value = aws_s3_bucket.mlflow_artifact_store.*.arn 7 | } -------------------------------------------------------------------------------- /terraform/s3/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | type = string 3 | description = "(Optional) A name for the application (e.g. mlflow)." 4 | default = "mlflow" 5 | } 6 | 7 | variable "tags" { 8 | type = map(string) 9 | description = "(Optional) AWS Tags common to all the resources created." 10 | default = {} 11 | } 12 | 13 | variable "create_dedicated_bucket" { 14 | type = bool 15 | description = "(Optional) Whether to create a dedicated S3 bucket for the application." 16 | default = true 17 | } 18 | -------------------------------------------------------------------------------- /terraform/server.tf: -------------------------------------------------------------------------------- 1 | resource "random_password" "mlflow_password" { 2 | length = 16 3 | special = true 4 | override_special = "_%@" 5 | } 6 | 7 | resource "aws_apprunner_service" "mlflow_server" { 8 | service_name = "${local.name}" 9 | 10 | source_configuration { 11 | auto_deployments_enabled = false 12 | 13 | image_repository { 14 | image_identifier = "public.ecr.aws/t9j8s4z8/mlflow:${var.mlflow_version}" 15 | image_repository_type = "ECR_PUBLIC" 16 | 17 | image_configuration { 18 | port = local.app_port 19 | runtime_environment_variables = { 20 | "MLFLOW_ARTIFACT_URI" = "s3://${module.s3.artifact_bucket_id}" 21 | "MLFLOW_DB_DIALECT" = "postgresql" 22 | "MLFLOW_DB_USERNAME" = "${aws_rds_cluster.mlflow_backend_store.master_username}" 23 | "MLFLOW_DB_PASSWORD" = "${random_password.mlflow_backend_store.result}" 24 | "MLFLOW_DB_HOST" = "${aws_rds_cluster.mlflow_backend_store.endpoint}" 25 | "MLFLOW_DB_PORT" = "${aws_rds_cluster.mlflow_backend_store.port}" 26 | "MLFLOW_DB_DATABASE" = "${aws_rds_cluster.mlflow_backend_store.database_name}" 27 | "MLFLOW_TRACKING_USERNAME" = var.mlflow_username 28 | "MLFLOW_TRACKING_PASSWORD" = local.mlflow_password 29 | "MLFLOW_SQLALCHEMYSTORE_POOLCLASS" = "NullPool" 30 | } 31 | } 32 | } 33 | } 34 | 35 | instance_configuration { 36 | cpu = var.service_cpu 37 | memory = var.service_memory 38 | instance_role_arn = aws_iam_role.mlflow_iam_role.arn 39 | } 40 | 41 | network_configuration { 42 | egress_configuration { 43 | egress_type = "VPC" 44 | vpc_connector_arn = aws_apprunner_vpc_connector.connector.arn 45 | } 46 | } 47 | 48 | health_check_configuration { 49 | healthy_threshold = 1 50 | unhealthy_threshold = 5 51 | interval = 20 52 | timeout = 20 53 | path = "/health" 54 | protocol = "HTTP" 55 | } 56 | 57 | tags = merge( 58 | { 59 | Name = "${local.name}" 60 | }, 61 | local.tags 62 | ) 63 | } 64 | 65 | resource "aws_security_group" "mlflow_server_sg" { 66 | count = local.create_dedicated_vpc ? 1 : 0 67 | name = "${var.name}-server-sg" 68 | description = "Allow access to ${local.name}-rds from VPC Connector." 69 | vpc_id = local.vpc_id 70 | 71 | ingress { 72 | description = "Access to ${local.name}-rds from VPC Connector." 73 | from_port = local.db_port 74 | to_port = local.db_port 75 | protocol = "tcp" 76 | self = true 77 | } 78 | 79 | egress { 80 | from_port = 0 81 | to_port = 0 82 | protocol = "-1" 83 | cidr_blocks = ["0.0.0.0/0"] 84 | ipv6_cidr_blocks = ["::/0"] 85 | } 86 | 87 | tags = { 88 | Name = "${var.name}-server-sg" 89 | } 90 | } 91 | 92 | resource "aws_apprunner_vpc_connector" "connector" { 93 | vpc_connector_name = "${local.name}-connector" 94 | subnets = local.db_subnet_ids 95 | security_groups = local.create_dedicated_vpc ? [aws_security_group.mlflow_server_sg.0.id] : var.vpc_security_group_ids 96 | } 97 | -------------------------------------------------------------------------------- /terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | description = "(Optional) A name for the application (e.g. mlflow)." 3 | type = string 4 | default = "mlflow" 5 | } 6 | 7 | variable "environment" { 8 | description = "(Optional) Environment. It will be part of the application name and a tag in AWS Tags." 9 | type = string 10 | default = "dev" 11 | } 12 | 13 | variable "aws_profile" { 14 | description = "(Optional) AWS profile to use. If not specified, the default profile will be used." 15 | type = string 16 | default = "default" 17 | } 18 | 19 | variable "aws_region" { 20 | description = "(Optional) AWS Region." 21 | type = string 22 | default = "us-east-1" 23 | } 24 | 25 | variable "tags" { 26 | type = map(string) 27 | description = "(Optional) AWS Tags common to all the resources created." 28 | default = {} 29 | } 30 | 31 | variable "vpc_id" { 32 | type = string 33 | description = "(Optional) VPC ID." 34 | default = null 35 | } 36 | 37 | variable "vpc_security_group_ids" { 38 | type = list(string) 39 | description = "(Optional) Security group IDs to allow access to the VPC. It will be used only if vpc_id is set." 40 | default = null 41 | } 42 | 43 | variable "service_cpu" { 44 | type = number 45 | default = 1024 46 | description = "The number of CPU units reserved for the MLflow container." 47 | } 48 | 49 | variable "service_memory" { 50 | type = number 51 | default = 2048 52 | description = "The amount (in MiB) of memory reserved for the MLflow container." 53 | } 54 | 55 | variable "mlflow_username" { 56 | description = "Username used in basic authentication provided by nginx." 57 | type = string 58 | default = "mlflow" 59 | } 60 | 61 | variable "mlflow_password" { 62 | description = "Password used in basic authentication provided by nginx. If not specified, this module will create a strong password for you." 63 | type = string 64 | default = null 65 | } 66 | 67 | variable "mlflow_version" { 68 | description = "The mlflow-server version to use. See github.com/DougTrajano/mlflow-server for the available versions." 69 | type = string 70 | default = "latest" 71 | } 72 | 73 | variable "artifact_bucket_id" { 74 | type = string 75 | default = null 76 | description = "If specified, MLflow will use this bucket to store artifacts. Otherwise, this module will create a dedicated bucket. When overriding this value, you need to enable the task role to access the root you specified." 77 | } 78 | 79 | variable "db_skip_final_snapshot" { 80 | type = bool 81 | default = false 82 | description = "(Optional) If true, this module will not create a final snapshot of the database before terminating." 83 | } 84 | 85 | variable "db_deletion_protection" { 86 | type = bool 87 | default = true 88 | description = "(Optional) If true, this module will not delete the database after terminating." 89 | } 90 | 91 | variable "db_instance_class" { 92 | type = string 93 | default = "db.t2.micro" 94 | description = "(Optional) The instance type of the RDS instance." 95 | } 96 | 97 | variable "db_subnet_ids" { 98 | type = list(string) 99 | default = null 100 | description = "List of subnets where the RDS database will be deployed" 101 | } 102 | 103 | variable "db_auto_pause" { 104 | type = bool 105 | default = true 106 | description = "If true, the Aurora Serverless cluster will be paused after a given amount of time with no activity. https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless.how-it-works.html#aurora-serverless.how-it-works.pause-resume" 107 | } 108 | 109 | variable "db_auto_pause_seconds" { 110 | type = number 111 | default = 1800 112 | description = "The number of seconds to wait before automatically pausing the Aurora Serverless cluster. This is only used if rds_auto_pause is true." 113 | } 114 | 115 | variable "db_min_capacity" { 116 | type = number 117 | default = 2 118 | description = "The minimum capacity for the Aurora Serverless cluster. Aurora will scale automatically in this range. See: https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless.how-it-works.html" 119 | } 120 | 121 | variable "db_max_capacity" { 122 | type = number 123 | default = 64 124 | description = "The maximum capacity for the Aurora Serverless cluster. Aurora will scale automatically in this range. See: https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless.how-it-works.html" 125 | } --------------------------------------------------------------------------------