├── .gitignore ├── .npmignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── THIRD-PARTY-LICENSES_b5207666-ce30-402b-a927-6d9f859e9ced.txt ├── cdk ├── .gitignore ├── .npmignore ├── README.md ├── bin │ └── app.ts ├── cdk.json ├── jest.config.js ├── lambda │ └── authorizer │ │ ├── index.py │ │ └── requirements.txt ├── lib │ ├── amplify-mlflow-stack.ts │ ├── mlflow-vpc-stack.ts │ ├── rest-api-gateway-stack.ts │ └── sagemaker-studio-user-stack.ts ├── package-lock.json ├── package.json ├── resize-cloud9.sh └── tsconfig.json ├── cognito-mlflow_v1-30-0.patch ├── cognito-mlflow_v2-12-2.patch ├── cognito-mlflow_v2-2-1.patch ├── cognito-mlflow_v2-3-1.patch ├── cognito-mlflow_v2-5-0.patch ├── cognito-mlflow_v2-8-0.patch ├── cognito-mlflow_v2-8-1.patch ├── cognito-mlflow_v2-9-2.patch ├── images ├── amplify-main-branch.png ├── amplify-mlflow-ui-link.png ├── amplify-redeploy-this-version.png ├── amplify-run-first-build.png ├── cognito-user-pool.png ├── enable-models.gif ├── jupyterlab-iframe-search.png ├── mlflow-architecture.png ├── mlflow-cognito.png ├── mlflow-gateway-architecture.png ├── mlflow-gateway-cognito.png ├── mlflow-gateway-sagemaker-cognito.png ├── mlflow-gateway-sagemaker.png ├── mlflow-output-artifacts.png ├── mlflow-sagemaker-cognito.png ├── mlflow-sagemaker-multi-account.png ├── mlflow-sagemaker.png ├── sagemaker-studio-domain.png ├── sm-mlflow-admin.png ├── studio-extension-manager.png └── studio-iframe-mlflow.png ├── lab ├── 1_mlflow-admin-lab.ipynb ├── 2_mlflow-reader-lab.ipynb ├── 3_mlflow-model-approver-lab.ipynb ├── 4_mlflow-gateway.ipynb ├── california_test.csv ├── california_train.csv └── source_dir │ ├── requirements.txt │ ├── setup.py │ └── train.py ├── resize-cloud9.sh └── src ├── cognito └── add_users_and_groups.py ├── mlflow-gateway ├── .dockerignore ├── Dockerfile └── config.yaml └── mlflow-server ├── .dockerignore └── Dockerfile /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | !jest.config.js 3 | *.d.ts 4 | **/node_modules/ 5 | **/.vscode/ 6 | 7 | # CDK asset staging directory 8 | .cdk.staging 9 | cdk.out 10 | 11 | # Parcel default cache directory 12 | .parcel-cache 13 | 14 | # Jupyter Notebook 15 | .ipynb_checkpoints 16 | 17 | # Environments 18 | .env 19 | .venv 20 | env/ 21 | venv/ 22 | 23 | **/.DS_Store 24 | 25 | **/mlflow/ 26 | 27 | env.list -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | *.ts 2 | !*.d.ts 3 | 4 | # CDK asset staging directory 5 | .cdk.staging 6 | cdk.out 7 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # ChangeLog 2 | 3 | ## 2.8.1 4 | 5 | * upgrade to MLflow `2.8.1` 6 | * provide patch for MLflow UI `2.8.1` 7 | 8 | ## 2.8.0 9 | 10 | * upgrade to MLflow `2.8.0` 11 | * provide patch for MLflow UI `2.8.0` 12 | * add MLflow Gateway AI deployment to CDK stack 13 | * update architecture diagrams to include MLflow Gateway AI 14 | * provide sample [`config.yaml`](src/mlflow-gateway/config.yaml) for adding Amazon Bedrock Claude V2 endpoint to MLflow Gateway AI 15 | * add sample notebook [`lab/4_mlflow-gateway.ipynb`](lab/4_mlflow-gateway.ipynb) -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This sample corresponds to the AWS Blog Post [Securing MLflow in AWS: Fine-grained access control with AWS native services](https://aws.amazon.com/blogs/machine-learning/securing-mlflow-in-aws-fine-grained-access-control-with-aws-native-services/) 2 | 3 | We regularly update this repository to align with the latest release on MLflow. 4 | The current last supported version of MLflow is `2.12.2`, including MLflow tracking server and MLflow Gateway AI. 5 | 6 | Check [CHANGELOG.md](CHANGELOG.md) for the latest changes. 7 | 8 | # Secure MLflow in AWS with native AWS services 9 | 10 | We aim to demostrate how it is possible to achieve AuthN/AuthZ in MLflow with a hybrid architecture using different AWS services to enable end-to-end Machine Learning workflows. 11 | Specifically, we look at Amazon SageMaker and MLflow, and how they can be integrated securely without worrying about managing credentials by using IAM Roles and temporary credentials. 12 | 13 | ## Custom authentication and authorization on MLflow 14 | 15 | This sample shows how to do the following: 16 | 17 | * How to deploy MLflow tracking server and MLflow gateway AI on a serverless architecture (we build on top of [running MLflow on Fargate](https://github.com/aws-samples/amazon-sagemaker-mlflow-fargate)) 18 | * How to expose a MLflow tracking server and MLflow gateway AI via private integrations to an Amazon API Gateway (we build on top of [running MLflow on AWS](https://github.com/aws-samples/aws-mlflow-sagemaker-cdk)) 19 | * How to add authentication and authorization for programmatic access and browser access to MLflow 20 | * How to access MLflow via SageMaker using SageMaker Execution Roles 21 | 22 | Due to its modularity, this sample can be extended in a number of ways, and we will provide guidance on how to do so. 23 | 24 | ## Architecture 25 | 26 | This sample is made of 4 different stacks: 27 | * [`MLflowVPCStack`](./cdk/lib/mlflow-vpc-stack.ts) 28 | * deploys MLfLow tracking server and MLflow gateway AI on a serverless infrastructure running on ECS and Fargate on a private subnet 29 | * deploys an Aurora Serverless database for the data store and S3 for the artifact store. 30 | * [`RestApiGatewayStack`](./cdk/lib/rest-api-gateway-stack.ts) 31 | * exposes the MLFlow server and gateway AI via a PrivateLink to an REST API Gateway. 32 | * deploys a Cognito User Pool to manage the users accessing the UI. 33 | * deploy a Lambda Authorizer to verify the JWT token with the Cognito User Pool ID keys and returns IAM policies to allow or deny a request. 34 | * adds IAM Authorizer for the MLflow client SDK routes. 35 | * [`AmplifyMLflowStack`](./cdk/lib/amplify-mlflow-stack.ts) 36 | * creates an app with CI/CD capability to deploy the MLFLow UI 37 | * [`SageMakerStudioUserStack`](./cdk/lib/sagemaker-studio-user-stack.ts) 38 | * deploys a SageMaker Studio domain (if not existing). 39 | * adds three users, each one with a different SageMaker execution role implementing different access level: 40 | * `mlflow-admin` -> admin like permission to the MLFlow resources 41 | * `mlflow-reader` -> read-only admin to the MLFlow resources 42 | * `mlflow-model-approver` -> same permissions as `mlflow-reader` plus can register new models from existing runs, and promote existing registered models to new stages in MLflow 43 | 44 | Our proposed architecture is shown Fig. 1 45 | 46 | ![Architecture](./images/mlflow-gateway-architecture.png) 47 | *Fig. 1 - MLflow on AWS architecture diagram* 48 | 49 | ## Prerequisites 50 | * Access to an AWS account with Admin permissions and credentials correctly set 51 | * Docker 52 | * Python 3.8 53 | 54 | ## Deployment 55 | 56 | ### Create and configure AWS Cloud9 environment 57 | 58 | Log into the AWS Management Console and search for [Cloud9](https://aws.amazon.com/cloud9/) in the search bar. 59 | Click Cloud9 and create an AWS Cloud9 environment region based on Amazon Linux 2. 60 | For the instance type, we tested with a `t3.large`, but you can very likely use a Free-Tier eligible instance. 61 | 62 | ### Provisioning AWS resources using the AWS CDK 63 | 64 | #### Clone the GitHub repository 65 | 66 | Open a new terminal inside AWS Cloud9 IDE and run: 67 | ```bash 68 | git clone https://github.com/aws-samples/sagemaker-studio-mlflow-integration.git 69 | ``` 70 | 71 | #### Setting the expected ENV variables 72 | 73 | The CDK script expects the following ENV variables to be set 74 | 75 | ```bash 76 | AWS_REGION= 77 | AWS_ACCOUNT= 78 | ``` 79 | 80 | If you would like to use an existing SageMaker Studio domain, please set this ENV variable 81 | 82 | ```bash 83 | DOMAIN_ID= 84 | ``` 85 | 86 | The default region used by the CDK app is `us-west-2`. 87 | You can change the default region by setting up the `AWS_REGION` environment variable. 88 | When working on Cloud9, you can specify the same region where your Cloud9 environment is running as follow: 89 | 90 | ```bash 91 | sudo yum install jq -y 92 | export AWS_REGION=$(curl -s 169.254.169.254/latest/dynamic/instance-identity/document | jq -r '.region') 93 | echo "export AWS_REGION=${AWS_REGION}" | tee -a ~/.bash_profile 94 | export AWS_ACCOUNT=$(aws sts get-caller-identity --query Account --output text) 95 | echo "export AWS_ACCOUNT=${AWS_ACCOUNT}" | tee -a ~/.bash_profile 96 | ``` 97 | 98 | The CDK script expects to find the ENV variable `DOMAIN_ID` in order to figure out if a new SageMaker Studio domain is needed or not. 99 | ```bash 100 | export DOMAIN_ID=$(aws sagemaker list-domains | jq -r 'select(.Domains[0] != null) .Domains[0].DomainId | tostring') 101 | echo "export DOMAIN_ID=${DOMAIN_ID}" | tee -a ~/.bash_profile 102 | ``` 103 | 104 | #### Apply patch to MLflow-UI 105 | 106 | MLflow UI does not support any login workflow, nonetheless mechanisms to set the proper headers to authenticated API calls against a backend service. 107 | Amplify provides libraries that can be used to quickly add a login workflow, and to easily manage the lifecycle of the authentication tokens. 108 | We provide you a patch to be applied on top of MLflow `2.12.2` that adds Amplify React Components for authentication and how to add `Authorization` header with a `Bearer` token for every backend API call. 109 | The patch we provided can be checked [here](./cognito-mlflow_v2-9-2.patch) and it will enable a login flow backed by Amazon Cognito as shown in Fig. 2. 110 | **Note: we also provide patches for previous versions of MLflow 111 | 112 | * `1.30.0`. If you want to install that version, you need to ensure mlflow `1.30.0` [here](./cognito-mlflow_v1-30-0.patch) installed throughout this sample, and you adapt the lab sample to work with that same version as the SDK for deploying a model to SageMaker has changed** 113 | * `2.2.1`. If you want to install that version, you need to ensure mlflow `2.2.1` [here](./cognito-mlflow_v2-2-1.patch) installed throughout this sample, and you adapt the lab sample to work with that same version as the SDK for deploying a model to SageMaker has changed** 114 | * `2.3.1`. If you want to install that version, you need to ensure mlflow `2.3.1` [here](./cognito-mlflow_v2-3-1.patch) installed throughout this sample, and you adapt the lab sample to work with that same version as the SDK for deploying a model to SageMaker has changed** 115 | * `2.5.0`. If you want to install that version, you need to ensure mlflow `2.5.0` [here](./cognito-mlflow_v2-5-0.patch) installed throughout this sample, and you adapt the lab sample to work with that same version as the SDK for deploying a model to SageMaker has changed** 116 | * `2.8.0`. If you want to install that version, you need to ensure mlflow `2.8.0` [here](./cognito-mlflow_v2-8-0.patch) installed throughout this sample, and you adapt the lab sample to work with that same version as the SDK for deploying a model to SageMaker has changed** 117 | * `2.8.1`. If you want to install that version, you need to ensure mlflow `2.8.0` [here](./cognito-mlflow_v2-8-0.patch) installed throughout this sample, and you adapt the lab sample to work with that same version as the SDK for deploying a model to SageMaker has changed** 118 | 119 | ```bash 120 | cd ~/environment/sagemaker-studio-mlflow-integration/ 121 | git clone --depth 1 --branch v2.12.2 https://github.com/mlflow/mlflow.git 122 | cd mlflow 123 | git am ../cognito-mlflow_v2-12-2.patch 124 | ``` 125 | 126 | #### Resizing the Cloud9 127 | Before deploying, since we use CDK construct to build the container images locally, we need a larger disk size than the one provided by Cloud9 in its default environment configuration (i.e. 20GB, whivh is not enough). 128 | To resize it on the fly without rebooting the instance, you can run the following script specifying a new desired size. 129 | 130 | ```bash 131 | cd ~/environment/sagemaker-studio-mlflow-integration/ 132 | ./resize-cloud9.sh 100 133 | ``` 134 | Where `100` represents the new desired disk size in GB. 135 | 136 | #### Install and bootstrap AWS CDK 137 | 138 | The AWS Cloud Development Kit (AWS CDK) is an open-source software development framework to model and provision your cloud application resources using familiar programming languages. 139 | If you would like to familiarize yourself the [CDKWorkshop](https://cdkworkshop.com/) is a great place to start. 140 | 141 | Using Cloud9 environment, open a new Terminal and use the following commands: 142 | ```bash 143 | cd ~/environment/sagemaker-studio-mlflow-integration/cdk 144 | npm install -g aws-cdk@2.104.0 --force 145 | cdk --version 146 | ``` 147 | 148 | Take a note of the latest version that you install, at the time of writing this post it is `2.104.0`. 149 | Open the package.json file and replace the version “2.104.0” of the following modules with the latest version that you have installed above. 150 | 151 | ```typescript 152 | "aws-cdk-lib": "2.104.0", 153 | "@aws-cdk/aws-amplify-alpha": "2.104.0-alpha.0", 154 | "@aws-cdk/aws-cognito-identitypool-alpha": "2.104.0-alpha.0", 155 | "@aws-cdk/aws-lambda-python-alpha": "2.104.0-alpha.0", 156 | ``` 157 | 158 | This will install all the latest CDK modules under the `node_modules` directory (`npm install`) and prepare your AWS account to deploy resources with CDK (`cdk bootstrap`). 159 | 160 | ```bash 161 | cd ~/environment/sagemaker-studio-mlflow-integration/cdk 162 | npm install 163 | cdk bootstrap 164 | ``` 165 | 166 | ## Provisioning AWS resources using AWS CDK 167 | 168 | Now we are ready to deploy our full solution. 169 | ```bash 170 | cdk deploy --all --require-approval never 171 | ``` 172 | 173 | To run this sample, we reccommend to deploy all 4 Stacks to test out the SageMaker integration. 174 | However, if you are only interested in the MLflow deployment (MLflow server, MLflow UI, and REST API Gateway), you can deploy only the first three stacks, i.e. [`MLflowVPCStack`](./cdk/lib/mlflow-vpc-stack.ts), [`RestApiGatewayStack`](./cdk/lib/rest-api-gateway-stack.ts) and [`AmplifyMLflowStack`](./cdk/lib/amplify-mlflow-stack.ts). 175 | 176 | ## Amazon Bedrock requirements 177 | **Base Models Access** 178 | 179 | If you are looking to interact with models from Amazon Bedrock, you need to [request access to the base models in one of the regions where Amazon Bedrock is available](https://console.aws.amazon.com/bedrock/home?#/modelaccess). 180 | Make sure to read and accept models' end-user license agreements or EULA. 181 | 182 | Note: 183 | - You can deploy the solution to a different region from where you requested Base Model access. To do that, adapt the [config.yaml](src/mlflow-gateway/config.yaml) and redeploy. 184 | - **While the Base Model access approval is instant, it might take several minutes to get access and see the list of models in the UI.** 185 | 186 | ![sample](images/enable-models.gif) 187 | (Gif taken from https://github.com/aws-samples/aws-genai-llm-chatbot) 188 | 189 | 190 | ## Cognito User Pool and Lambda Authorizer 191 | 192 | We have provided a script that will populate the Cognito User Pool with 3 users, each belonging to a different group. 193 | To execute the script, please run the following command. 194 | The script will prompt you to enter your desired password. 195 | Please ensure that the password you pick respects the password policy defined for [Cognito](https://github.com/aws-samples/sagemaker-studio-mlflow-integration/blob/449d5557ee6edeb4c7f17ce5f2dfa310e5684ee6/cdk/lib/rest-api-gateway-stack.ts#L40) 196 | 197 | ```bash 198 | cd ~/environment/sagemaker-studio-mlflow-integration/src/cognito/ 199 | python add_users_and_groups.py 200 | ``` 201 | To check the script code [here](./src/cognito/add_users_and_groups.py). 202 | 203 | After running the script, if you check the Cognito User Pool in the console you should see the three users created 204 | 205 | ![CognitoUsers](./images/cognito-user-pool.png) 206 | *Fig. 2 - Cognito users in the Cognito User Pool.* 207 | 208 | On the REST API Gateway side, the Lambda Authorizer will first verify the signature of the token using the Cognito User Pool Key, verify the claims, and only after that, it will extract the cognito group the user belongs to from the claim in JWT token (i.e., `cognito:groups`), and apply different permissions based on the group itself that we have programmed. 209 | For our specific case, we have three groups: 210 | * `admins` - can see and can edit everything 211 | * `readers` - can only read everything 212 | * `model-approvers` - same as `readers` plus permissions to register models, create model versions, and update models to different stages. 213 | 214 | Depending on the group, the Lambda Authorizer will generate different IAM Policies. 215 | This is just an example on how authorization can be achieved, in fact, with a Lambda Authorizer, you can implement any logic you want. 216 | If you want to restrict only a subset of actions, you need to be aware of the MLFlow REST API definition, which can be found [here](https://www.mlflow.org/docs/latest/rest-api.html) 217 | The code for the Lambda Authorizer can be explored [here](./cdk/lambda/authorizer/index.py) 218 | 219 | ![MLflowCognito](./images/mlflow-gateway-cognito.png) 220 | *Fig. 3 - MLflow login flow using AWS Amplify, Amazon Cognito and Lambda Authorizer on the API Gateway* 221 | 222 | ## *Integration with SageMaker* 223 | 224 | One of the key aspect of this sample, is the integration with SageMaker. 225 | Permissions in SageMaker are managed via IAM Roles, for SageMaker also called Execution Roles that are associated to the service when in use (both when using SageMaker Studio, or the SageMaker managed infrastructure). 226 | By allowing the API Gateway to use IAM authentication on the `/api/`, we can do exatly that. 227 | 228 | ### Provision a new SageMaker Studio domain 229 | 230 | Provisioning a new SageMaker Studio domain will do the following operations: 231 | 232 | * Create a new SageMaker Studio domain in the default VPC. (unless already existing) 233 | * Create three new SageMaker Studio users attached to the domain and three different execution role created attached to them. These execution role the same permissions that the Lambda Authorizer applies to the different groups. 234 | * `mlflow-admin` - has associated an execution role with the similar permissions as the user in the cognito group `admins` 235 | * `mlflow-reader` - has associated an execution role with the similar permissions as the user in the cognito group `readers` 236 | * `mlflow-model-arrpover` - has associated an execution role with the similar permissions as the user in the cognito group `deny-all` 237 | 238 | ![MLflowSageMaker](./images/mlflow-gateway-sagemaker.png) 239 | *Fig. 3 - Accessing MLflow from SageMaker Studio and SageMaker Training Jobs using IAM Roles* 240 | 241 | ### Push the `mlflow-pyfunc` container to ECR 242 | 243 | #### Ensure Python 3.8 (or greater) is installed 244 | 245 | In order to deploy to SageMaker an mlflow model, you need to create a serving container that implements what the SageMaker runtime expects to find. 246 | MLflow makes this effor easier by providing a CLI command that build the image locally and pushes to your ECR the image. 247 | Most recent versions of MLflow have dependencies on `Python 3.8`. 248 | 249 | ```bash 250 | python --version 251 | ``` 252 | 253 | If running this sample on Cloud9, you need to ensure you have Python `3.8` installed. 254 | You can follow these instructions on how to do it 255 | ```bash 256 | sudo yum install -y amazon-linux-extras 257 | sudo amazon-linux-extras enable python3.8 258 | sudo yum install -y python3.8 259 | ``` 260 | 261 | #### Push the `mlflow-pyfunc` container to ECR 262 | 263 | Il on Cloud9 run the following (after installing Python 3.8) 264 | ```bash 265 | # install the libraries 266 | pip3.8 install mlflow==2.12.2 boto3 # or pip install mlflow==2.12.2 boto3 if your default pip comes alongside a python version >= 3.8 267 | ``` 268 | 269 | ```bash 270 | # build and push the container to ECR into your account 271 | mlflow sagemaker build-and-push-container 272 | ``` 273 | 274 | ### Accessing the MLflow UI 275 | Before accessing the MLflow UI, we need to ensure the first build got successfully executed. 276 | Navigate to the Amplify console, and select the `MLflow-UI` app that we have created. 277 | 278 | Once the build completes (might take some time) you can access the MLFlow UI from the link provided by Amplify as shown in Fig. 5. 279 | 280 | ![AmplifyMLflowUI](./images/amplify-mlflow-ui-link.png) 281 | *Fig. 4 - Retrieve the URL of the MLflow UI* 282 | 283 | There might be cases when the first Amplify build fails. 284 | If this is not the case, you should re-deploy manually the Amplify build by navigating to the failed build. 285 | You first select the `main` branch 286 | 287 | ![AmplifyMainBranch](./images/amplify-main-branch.png) 288 | *Fig. 5 - Navigate to the Amplify `main` branch* 289 | 290 | and then click on the "Redeploy this version". 291 | 292 | ![AmplifyRedeployThisVersion](./images/amplify-redeploy-this-version.png) 293 | *Fig. 6 - Redeploy the same failed build* 294 | 295 | After a few minutes, you should see the successful build. 296 | 297 | ### MLflow / Amazon SageMaker Studio integration lab 298 | 299 | In the AWS console, navigate to Amazon SageMaker Studio and open Studio for the `mlflow-admin` user as shown in the pictures below. 300 | 301 | ![SageMakerStudio](./images/sagemaker-studio-domain.png) 302 | *Fig 6 - Navigate to Amazon SageMaker Studio* 303 | 304 | ![SageMakerStudioUser](./images/sm-mlflow-admin.png) 305 | *Fig 7 - Launch Amazon SageMaker Studio for the `mlflow-admin`* 306 | 307 | Clone this repository either from the terminal or from the SageMaker Studio UI. 308 | 309 | ```bash 310 | git clone https://github.com/aws-samples/sagemaker-studio-mlflow-integration.git 311 | ``` 312 | 313 | ## Labs 314 | We provide three labs located in the `./sagemaker-studio-mlflow-integration/lab/` folder. 315 | When running the labs, please make sure the kernel selected is `Base Python 2.0` (it should be selected by default). 316 | 1. [`1_mlflow-admin-lab.ipynb`](./lab/1_mlflow-admin-lab.ipybn) For this lab, please use the `mlflow-admin` user profile created for you in SageMaker Studio. In this lab you will test an admin permission. In here we access MLflow from both SageMaker Studio, and from a SageMaker Training Job using the execution role assigned to the user profile `mlflow-admin`. Once the training is completed, we further show how to register models, create model versions from the artifact, and download locally the artifacts for testing purposes. Finally, we show how to deploy the model on the SageMaker Managed infrastructure. Furthermore, the lab shows how you can enrich MLflow metadata with SageMaker metadata, and vice versa, by storing MFlow specifics in SageMaker via SageMaker Experiments SDK and visualize them in the SageMaker Studio UI. 317 | 2. [`2_mlflow-reader-lab.ipynb`](./lab/2_mlflow-reader-lab.ipybn) For this lab, please use the `mlflow-reader` user profile created for you in SageMaker Studio. In this lab you will test read like permissions. You can see details about every experiment, every run, as well as registered models and model versions, however you cannot modify / create new entities. 318 | 3. [`3_mlflow-model-approver-lab.ipynb`](./lab/3_mlflow-model-approver-lab.ipybn) For this lab, please use the `mlflow-model-approver` user profile created for you in SageMaker Studio. In this lab you will test the permissions to register new models and new model versions. 319 | 320 | ## Render MLflow within SageMaker Studio 321 | 322 | SageMaker Studio is based upon Jupyter Lab, and it offers the same flexibility to extend its capabilities thanks for jupyter extensions. 323 | You have the possibility to build your own extension, or to access one of the existing one via the "Extension Manager" (see Fig. 9). 324 | 325 | ![studio-extension-manager](./images/studio-extension-manager.png) 326 | 327 | *Fig. 9 - Enable Extension Manager in SageMaker Studio* 328 | 329 | For our excercise, the [`jupyterlab-iframe`](https://pypi.org/project/jupyterlab-iframe/) extension provides us the capability to render websites within an iframe. 330 | To install, you can either follow the instructions in the extension documentation, or install it via the Extension Manager. 331 | 332 | Once successfully installed, from the SageMaker Studio menu, `View`->`Activate Command Palette` dialog, search for "iframe" as in figure 333 | 334 | ![jupyterlab-iframe-search](./images/jupyterlab-iframe-search.png) 335 | 336 | *Fig. 10 - Open the jupyterlab-iframe dialog* 337 | 338 | Finally, set the MLflow UI URL generated by Amplify and open the tab. 339 | You can now access MLflow UI without leaving the SageMaker Studio UI using the same set of credentials you have stored in Amazon Cognito as shown in Fig. 11 340 | 341 | ![studio-iframe-mlflow](./images/studio-iframe-mlflow.png) 342 | 343 | *Fig. 11 - Access MLflow UI from within SageMaker Studio* 344 | 345 | ## Cleanup 346 | 347 | You can destroy the CDK stack by running the following command: 348 | 349 | ```bash 350 | cd ~/environment/aws-mlflow-sagemaker-cdk/cdk 351 | cdk destroy --all 352 | ``` 353 | 354 | At the prompt, enter `y`. 355 | 356 | There might be cases when the cleanup might not work. 357 | Usually, this is due to the creation of different SageMaker Studio KernelApps than the ones have been provisioned by the CDK stack. 358 | In this case, you should first delete all `KernelApp` on all user profiles manually, and then try again to destroy the stack as explained earlier. 359 | 360 | Cost of just running this sample: < 10$. 361 | The biggest cost driver in this sample are the 3 `KernelGateway` apps initialized for the SageMaker Studio Domain. 362 | To save costs, you can delete the 3 `KernelGateway` apps, one for each user profile, that spins up a `ml.t3.medium` instance type each. 363 | They can be deleted from the console, and they are named as `instance-mlflow-basepython-2-0-ml-t3-medium`. 364 | Alternatively, you could install the [sagemaker-studio-auto-shutdown-extension](https://github.com/aws-samples/sagemaker-studio-auto-shutdown-extension) to save on costs. 365 | 366 | ## Conclusion 367 | 368 | We have shown how you can add authentication and authorization to a single tenent MLflow serverless installation with minimal code changes to MLflow. 369 | The highlight of this exercise is the authentication to an MLflow tracking server via IAM Roles within SageMaker, leveraging the security the IAM carries with it. 370 | -------------------------------------------------------------------------------- /THIRD-PARTY-LICENSES_b5207666-ce30-402b-a927-6d9f859e9ced.txt: -------------------------------------------------------------------------------- 1 | ** aws-cdk; version 2.63.2 -- https://github.com/aws/aws-cdk 2 | ** mlflow; version 1.30.0 -- https://github.com/mlflow/mlflow/tree/v1.30.0 3 | 4 | Apache License 5 | Version 2.0, January 2004 6 | http://www.apache.org/licenses/ 7 | 8 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 9 | 10 | 1. Definitions. 11 | 12 | "License" shall mean the terms and conditions for use, reproduction, and 13 | distribution as defined by Sections 1 through 9 of this document. 14 | 15 | "Licensor" shall mean the copyright owner or entity authorized by the copyright 16 | owner that is granting the License. 17 | 18 | "Legal Entity" shall mean the union of the acting entity and all other entities 19 | that control, are controlled by, or are under common control with that entity. 20 | For the purposes of this definition, "control" means (i) the power, direct or 21 | indirect, to cause the direction or management of such entity, whether by 22 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity exercising 26 | permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, including 29 | but not limited to software source code, documentation source, and configuration 30 | files. 31 | 32 | "Object" form shall mean any form resulting from mechanical transformation or 33 | translation of a Source form, including but not limited to compiled object code, 34 | generated documentation, and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or Object form, made 37 | available under the License, as indicated by a copyright notice that is included 38 | in or attached to the work (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object form, that 41 | is based on (or derived from) the Work and for which the editorial revisions, 42 | annotations, elaborations, or other modifications represent, as a whole, an 43 | original work of authorship. For the purposes of this License, Derivative Works 44 | shall not include works that remain separable from, or merely link (or bind by 45 | name) to the interfaces of, the Work and Derivative Works thereof. 46 | 47 | "Contribution" shall mean any work of authorship, including the original version 48 | of the Work and any modifications or additions to that Work or Derivative Works 49 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 50 | by the copyright owner or by an individual or Legal Entity authorized to submit 51 | on behalf of the copyright owner. For the purposes of this definition, 52 | "submitted" means any form of electronic, verbal, or written communication sent 53 | to the Licensor or its representatives, including but not limited to 54 | communication on electronic mailing lists, source code control systems, and 55 | issue tracking systems that are managed by, or on behalf of, the Licensor for 56 | the purpose of discussing and improving the Work, but excluding communication 57 | that is conspicuously marked or otherwise designated in writing by the copyright 58 | owner as "Not a Contribution." 59 | 60 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf 61 | of whom a Contribution has been received by Licensor and subsequently 62 | incorporated within the Work. 63 | 64 | 2. Grant of Copyright License. Subject to the terms and conditions of this 65 | License, each Contributor hereby grants to You a perpetual, worldwide, non- 66 | exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, 67 | prepare Derivative Works of, publicly display, publicly perform, sublicense, and 68 | distribute the Work and such Derivative Works in Source or Object form. 69 | 70 | 3. Grant of Patent License. Subject to the terms and conditions of this License, 71 | each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no- 72 | charge, royalty-free, irrevocable (except as stated in this section) patent 73 | license to make, have made, use, offer to sell, sell, import, and otherwise 74 | transfer the Work, where such license applies only to those patent claims 75 | licensable by such Contributor that are necessarily infringed by their 76 | Contribution(s) alone or by combination of their Contribution(s) with the Work 77 | to which such Contribution(s) was submitted. If You institute patent litigation 78 | against any entity (including a cross-claim or counterclaim in a lawsuit) 79 | alleging that the Work or a Contribution incorporated within the Work 80 | constitutes direct or contributory patent infringement, then any patent licenses 81 | granted to You under this License for that Work shall terminate as of the date 82 | such litigation is filed. 83 | 84 | 4. Redistribution. You may reproduce and distribute copies of the Work or 85 | Derivative Works thereof in any medium, with or without modifications, and in 86 | Source or Object form, provided that You meet the following conditions: 87 | 88 | (a) You must give any other recipients of the Work or Derivative Works a 89 | copy of this License; and 90 | 91 | (b) You must cause any modified files to carry prominent notices stating 92 | that You changed the files; and 93 | 94 | (c) You must retain, in the Source form of any Derivative Works that You 95 | distribute, all copyright, patent, trademark, and attribution notices from the 96 | Source form of the Work, excluding those notices that do not pertain to any part 97 | of the Derivative Works; and 98 | 99 | (d) If the Work includes a "NOTICE" text file as part of its distribution, 100 | then any Derivative Works that You distribute must include a readable copy of 101 | the attribution notices contained within such NOTICE file, excluding those 102 | notices that do not pertain to any part of the Derivative Works, in at least one 103 | of the following places: within a NOTICE text file distributed as part of the 104 | Derivative Works; within the Source form or documentation, if provided along 105 | with the Derivative Works; or, within a display generated by the Derivative 106 | Works, if and wherever such third-party notices normally appear. The contents of 107 | the NOTICE file are for informational purposes only and do not modify the 108 | License. You may add Your own attribution notices within Derivative Works that 109 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 110 | provided that such additional attribution notices cannot be construed as 111 | modifying the License. 112 | 113 | You may add Your own copyright statement to Your modifications and may 114 | provide additional or different license terms and conditions for use, 115 | reproduction, or distribution of Your modifications, or for any such Derivative 116 | Works as a whole, provided Your use, reproduction, and distribution of the Work 117 | otherwise complies with the conditions stated in this License. 118 | 119 | 5. Submission of Contributions. Unless You explicitly state otherwise, any 120 | Contribution intentionally submitted for inclusion in the Work by You to the 121 | Licensor shall be under the terms and conditions of this License, without any 122 | additional terms or conditions. Notwithstanding the above, nothing herein shall 123 | supersede or modify the terms of any separate license agreement you may have 124 | executed with Licensor regarding such Contributions. 125 | 126 | 6. Trademarks. This License does not grant permission to use the trade names, 127 | trademarks, service marks, or product names of the Licensor, except as required 128 | for reasonable and customary use in describing the origin of the Work and 129 | reproducing the content of the NOTICE file. 130 | 131 | 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in 132 | writing, Licensor provides the Work (and each Contributor provides its 133 | Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 134 | KIND, either express or implied, including, without limitation, any warranties 135 | or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 136 | PARTICULAR PURPOSE. You are solely responsible for determining the 137 | appropriateness of using or redistributing the Work and assume any risks 138 | associated with Your exercise of permissions under this License. 139 | 140 | 8. Limitation of Liability. In no event and under no legal theory, whether in 141 | tort (including negligence), contract, or otherwise, unless required by 142 | applicable law (such as deliberate and grossly negligent acts) or agreed to in 143 | writing, shall any Contributor be liable to You for damages, including any 144 | direct, indirect, special, incidental, or consequential damages of any character 145 | arising as a result of this License or out of the use or inability to use the 146 | Work (including but not limited to damages for loss of goodwill, work stoppage, 147 | computer failure or malfunction, or any and all other commercial damages or 148 | losses), even if such Contributor has been advised of the possibility of such 149 | damages. 150 | 151 | 9. Accepting Warranty or Additional Liability. While redistributing the Work or 152 | Derivative Works thereof, You may choose to offer, and charge a fee for, 153 | acceptance of support, warranty, indemnity, or other liability obligations 154 | and/or rights consistent with this License. However, in accepting such 155 | obligations, You may act only on Your own behalf and on Your sole 156 | responsibility, not on behalf of any other Contributor, and only if You agree to 157 | indemnify, defend, and hold each Contributor harmless for any liability incurred 158 | by, or claims asserted against, such Contributor by reason of your accepting any 159 | such warranty or additional liability. 160 | 161 | END OF TERMS AND CONDITIONS 162 | 163 | APPENDIX: How to apply the Apache License to your work. 164 | 165 | To apply the Apache License to your work, attach the following boilerplate 166 | notice, with the fields enclosed by brackets "[]" replaced with your own 167 | identifying information. (Don't include the brackets!) The text should be 168 | enclosed in the appropriate comment syntax for the file format. We also 169 | recommend that a file or class name and description of purpose be included on 170 | the same "printed page" as the copyright notice for easier identification within 171 | third-party archives. 172 | 173 | Copyright [yyyy] [name of copyright owner] 174 | 175 | Licensed under the Apache License, Version 2.0 (the "License"); 176 | you may not use this file except in compliance with the License. 177 | You may obtain a copy of the License at 178 | 179 | http://www.apache.org/licenses/LICENSE-2.0 180 | 181 | Unless required by applicable law or agreed to in writing, software 182 | distributed under the License is distributed on an "AS IS" BASIS, 183 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 184 | See the License for the specific language governing permissions and 185 | limitations under the License. 186 | 187 | * For aws-cdk see also this required NOTICE: 188 | 189 | AWS Cloud Development Kit (AWS CDK) 190 | Copyright 2018-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 191 | * For mlflow see also this required NOTICE: 192 | Copyright 2018 Databricks, Inc. All rights reserved. 193 | 194 | Apache License 195 | Version 2.0, January 2004 196 | http://www.apache.org/licenses/ 197 | 198 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 199 | 200 | 1. Definitions. 201 | 202 | "License" shall mean the terms and conditions for use, reproduction, 203 | and distribution as defined by Sections 1 through 9 of this document. 204 | 205 | "Licensor" shall mean the copyright owner or entity authorized by 206 | the copyright owner that is granting the License. 207 | 208 | "Legal Entity" shall mean the union of the acting entity and all 209 | other entities that control, are controlled by, or are under common 210 | control with that entity. For the purposes of this definition, 211 | "control" means (i) the power, direct or indirect, to cause the 212 | direction or management of such entity, whether by contract or 213 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 214 | outstanding shares, or (iii) beneficial ownership of such entity. 215 | 216 | "You" (or "Your") shall mean an individual or Legal Entity 217 | exercising permissions granted by this License. 218 | 219 | "Source" form shall mean the preferred form for making modifications, 220 | including but not limited to software source code, documentation 221 | source, and configuration files. 222 | 223 | "Object" form shall mean any form resulting from mechanical 224 | transformation or translation of a Source form, including but 225 | not limited to compiled object code, generated documentation, 226 | and conversions to other media types. 227 | 228 | "Work" shall mean the work of authorship, whether in Source or 229 | Object form, made available under the License, as indicated by a 230 | copyright notice that is included in or attached to the work 231 | (an example is provided in the Appendix below). 232 | 233 | "Derivative Works" shall mean any work, whether in Source or Object 234 | form, that is based on (or derived from) the Work and for which the 235 | editorial revisions, annotations, elaborations, or other modifications 236 | represent, as a whole, an original work of authorship. For the 237 | purposes 238 | of this License, Derivative Works shall not include works that remain 239 | separable from, or merely link (or bind by name) to the interfaces of, 240 | the Work and Derivative Works thereof. 241 | 242 | "Contribution" shall mean any work of authorship, including 243 | the original version of the Work and any modifications or additions 244 | to that Work or Derivative Works thereof, that is intentionally 245 | submitted to Licensor for inclusion in the Work by the copyright owner 246 | or by an individual or Legal Entity authorized to submit on behalf of 247 | the copyright owner. For the purposes of this definition, "submitted" 248 | means any form of electronic, verbal, or written communication sent 249 | to the Licensor or its representatives, including but not limited to 250 | communication on electronic mailing lists, source code control 251 | systems, 252 | and issue tracking systems that are managed by, or on behalf of, the 253 | Licensor for the purpose of discussing and improving the Work, but 254 | excluding communication that is conspicuously marked or otherwise 255 | designated in writing by the copyright owner as "Not a Contribution." 256 | 257 | "Contributor" shall mean Licensor and any individual or Legal Entity 258 | on behalf of whom a Contribution has been received by Licensor and 259 | subsequently incorporated within the Work. 260 | 261 | 2. Grant of Copyright License. Subject to the terms and conditions of 262 | this License, each Contributor hereby grants to You a perpetual, 263 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 264 | copyright license to reproduce, prepare Derivative Works of, 265 | publicly display, publicly perform, sublicense, and distribute the 266 | Work and such Derivative Works in Source or Object form. 267 | 268 | 3. Grant of Patent License. Subject to the terms and conditions of 269 | this License, each Contributor hereby grants to You a perpetual, 270 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 271 | (except as stated in this section) patent license to make, have made, 272 | use, offer to sell, sell, import, and otherwise transfer the Work, 273 | where such license applies only to those patent claims licensable 274 | by such Contributor that are necessarily infringed by their 275 | Contribution(s) alone or by combination of their Contribution(s) 276 | with the Work to which such Contribution(s) was submitted. If You 277 | institute patent litigation against any entity (including a 278 | cross-claim or counterclaim in a lawsuit) alleging that the Work 279 | or a Contribution incorporated within the Work constitutes direct 280 | or contributory patent infringement, then any patent licenses 281 | granted to You under this License for that Work shall terminate 282 | as of the date such litigation is filed. 283 | 284 | 4. Redistribution. You may reproduce and distribute copies of the 285 | Work or Derivative Works thereof in any medium, with or without 286 | modifications, and in Source or Object form, provided that You 287 | meet the following conditions: 288 | 289 | (a) You must give any other recipients of the Work or 290 | Derivative Works a copy of this License; and 291 | 292 | (b) You must cause any modified files to carry prominent notices 293 | stating that You changed the files; and 294 | 295 | (c) You must retain, in the Source form of any Derivative Works 296 | that You distribute, all copyright, patent, trademark, and 297 | attribution notices from the Source form of the Work, 298 | excluding those notices that do not pertain to any part of 299 | the Derivative Works; and 300 | 301 | (d) If the Work includes a "NOTICE" text file as part of its 302 | distribution, then any Derivative Works that You distribute must 303 | include a readable copy of the attribution notices contained 304 | within such NOTICE file, excluding those notices that do not 305 | pertain to any part of the Derivative Works, in at least one 306 | of the following places: within a NOTICE text file distributed 307 | as part of the Derivative Works; within the Source form or 308 | documentation, if provided along with the Derivative Works; or, 309 | within a display generated by the Derivative Works, if and 310 | wherever such third-party notices normally appear. The contents 311 | of the NOTICE file are for informational purposes only and 312 | do not modify the License. You may add Your own attribution 313 | notices within Derivative Works that You distribute, alongside 314 | or as an addendum to the NOTICE text from the Work, provided 315 | that such additional attribution notices cannot be construed 316 | as modifying the License. 317 | 318 | You may add Your own copyright statement to Your modifications and 319 | may provide additional or different license terms and conditions 320 | for use, reproduction, or distribution of Your modifications, or 321 | for any such Derivative Works as a whole, provided Your use, 322 | reproduction, and distribution of the Work otherwise complies with 323 | the conditions stated in this License. 324 | 325 | 5. Submission of Contributions. Unless You explicitly state otherwise, 326 | any Contribution intentionally submitted for inclusion in the Work 327 | by You to the Licensor shall be under the terms and conditions of 328 | this License, without any additional terms or conditions. 329 | Notwithstanding the above, nothing herein shall supersede or modify 330 | the terms of any separate license agreement you may have executed 331 | with Licensor regarding such Contributions. 332 | 333 | 6. Trademarks. This License does not grant permission to use the trade 334 | names, trademarks, service marks, or product names of the Licensor, 335 | except as required for reasonable and customary use in describing the 336 | origin of the Work and reproducing the content of the NOTICE file. 337 | 338 | 7. Disclaimer of Warranty. Unless required by applicable law or 339 | agreed to in writing, Licensor provides the Work (and each 340 | Contributor provides its Contributions) on an "AS IS" BASIS, 341 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 342 | implied, including, without limitation, any warranties or conditions 343 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 344 | PARTICULAR PURPOSE. You are solely responsible for determining the 345 | appropriateness of using or redistributing the Work and assume any 346 | risks associated with Your exercise of permissions under this License. 347 | 348 | 8. Limitation of Liability. In no event and under no legal theory, 349 | whether in tort (including negligence), contract, or otherwise, 350 | unless required by applicable law (such as deliberate and grossly 351 | negligent acts) or agreed to in writing, shall any Contributor be 352 | liable to You for damages, including any direct, indirect, special, 353 | incidental, or consequential damages of any character arising as a 354 | result of this License or out of the use or inability to use the 355 | Work (including but not limited to damages for loss of goodwill, 356 | work stoppage, computer failure or malfunction, or any and all 357 | other commercial damages or losses), even if such Contributor 358 | has been advised of the possibility of such damages. 359 | 360 | 9. Accepting Warranty or Additional Liability. While redistributing 361 | the Work or Derivative Works thereof, You may choose to offer, 362 | and charge a fee for, acceptance of support, warranty, indemnity, 363 | or other liability obligations and/or rights consistent with this 364 | License. However, in accepting such obligations, You may act only 365 | on Your own behalf and on Your sole responsibility, not on behalf 366 | of any other Contributor, and only if You agree to indemnify, 367 | defend, and hold each Contributor harmless for any liability 368 | incurred by, or claims asserted against, such Contributor by reason 369 | of your accepting any such warranty or additional liability. 370 | 371 | END OF TERMS AND CONDITIONS 372 | APPENDIX: How to apply the Apache License to your work. 373 | 374 | To apply the Apache License to your work, attach the following 375 | boilerplate notice, with the fields enclosed by brackets "[]" 376 | replaced with your own identifying information. (Don't include 377 | the brackets!) The text should be enclosed in the appropriate 378 | comment syntax for the file format. We also recommend that a 379 | file or class name and description of purpose be included on the 380 | same "printed page" as the copyright notice for easier 381 | identification within third-party archives. 382 | 383 | Copyright [yyyy] [name of copyright owner] 384 | 385 | Licensed under the Apache License, Version 2.0 (the "License"); 386 | you may not use this file except in compliance with the License. 387 | You may obtain a copy of the License at 388 | 389 | http://www.apache.org/licenses/LICENSE-2.0 390 | 391 | Unless required by applicable law or agreed to in writing, software 392 | distributed under the License is distributed on an "AS IS" BASIS, 393 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 394 | See the License for the specific language governing permissions and 395 | limitations under the License. 396 | 397 | -------------------------------------------------------------------------------- /cdk/.gitignore: -------------------------------------------------------------------------------- 1 | *.js 2 | !jest.config.js 3 | *.d.ts 4 | **/node_modules/ 5 | **/.vscode/ 6 | 7 | # CDK asset staging directory 8 | .cdk.staging 9 | cdk.out 10 | -------------------------------------------------------------------------------- /cdk/.npmignore: -------------------------------------------------------------------------------- 1 | *.ts 2 | !*.d.ts 3 | 4 | # CDK asset staging directory 5 | .cdk.staging 6 | cdk.out 7 | -------------------------------------------------------------------------------- /cdk/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to your CDK TypeScript project! 2 | 3 | This is a blank project for TypeScript development with CDK. 4 | 5 | The `cdk.json` file tells the CDK Toolkit how to execute your app. 6 | 7 | ## Useful commands 8 | 9 | * `npm run build` compile typescript to js 10 | * `npm run watch` watch for changes and compile 11 | * `npm run test` perform the jest unit tests 12 | * `cdk deploy` deploy this stack to your default AWS account/region 13 | * `cdk diff` compare deployed stack with current state 14 | * `cdk synth` emits the synthesized CloudFormation template 15 | -------------------------------------------------------------------------------- /cdk/bin/app.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import 'source-map-support/register'; 3 | import * as cdk from 'aws-cdk-lib'; 4 | import { MLflowVpcStack } from '../lib/mlflow-vpc-stack'; 5 | import { RestApiGatewayStack } from '../lib/rest-api-gateway-stack'; 6 | import { SageMakerStudioUserStack } from '../lib/sagemaker-studio-user-stack'; 7 | import { AmplifyMlflowStack } from '../lib/amplify-mlflow-stack'; 8 | import { AwsSolutionsChecks } from 'cdk-nag' 9 | import { Aspects } from 'aws-cdk-lib'; 10 | import { NagSuppressions } from 'cdk-nag' 11 | 12 | const env = { region: (process.env['AWS_REGION'] || 'us-west-2'), account: process.env['AWS_ACCOUNT'] }; 13 | console.log('#####################################################################') 14 | console.log(`# you are deploying account ${env.account} in region: ${env.region} #`) 15 | console.log('#####################################################################') 16 | 17 | const domainId = (process.env['DOMAIN_ID'] || "" ) 18 | if (domainId == "") { 19 | console.log('no SageMaker domain ID has been provided') 20 | } 21 | else { 22 | console.log(`SageMaker domain ID provided ${domainId}`) 23 | } 24 | const app = new cdk.App(); 25 | 26 | const mlflowVpcStack = new MLflowVpcStack( 27 | app, 28 | 'MLflowVpcStack', 29 | { env: env } 30 | ); 31 | 32 | const restApiGatewayStack = new RestApiGatewayStack( 33 | app, 34 | 'RestApiGatewayStack', 35 | mlflowVpcStack.httpApiInternalNLB, 36 | { env: env } 37 | ); 38 | 39 | const sagemakerStudioUserStack = new SageMakerStudioUserStack( 40 | app, 41 | 'SageMakerStudioUserStack', 42 | RestApiGatewayStack.name, 43 | restApiGatewayStack.restApi, 44 | domainId, 45 | mlflowVpcStack.accessLogs, 46 | { env: env } 47 | ) 48 | 49 | const amplifyMlflowStack = new AmplifyMlflowStack( 50 | app, 51 | 'AmplifyMlflowStack', 52 | restApiGatewayStack.restApi, 53 | restApiGatewayStack.userPool, 54 | restApiGatewayStack.identityPool, 55 | restApiGatewayStack.userPoolClient, 56 | sagemakerStudioUserStack.sagemakerStudioDomainId, 57 | { env: env } 58 | ) 59 | 60 | Aspects.of(app).add(new AwsSolutionsChecks({ verbose: true })) 61 | 62 | NagSuppressions.addStackSuppressions(amplifyMlflowStack, [ 63 | { 64 | id: 'AwsSolutions-L1', 65 | reason: 'Do not have control to configure this rule as it is generated by AwsCustomResource see https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.custom_resources.AwsSdkCall.html', 66 | } 67 | ]); -------------------------------------------------------------------------------- /cdk/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/app.ts", 3 | "context": { 4 | "aws-cdk:enableDiffNoFail": "true", 5 | "@aws-cdk/core:stackRelativeExports": "true", 6 | "@aws-cdk/aws-kms:defaultKeyPolicies": true 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /cdk/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | roots: ['/test'], 3 | testMatch: ['**/*.test.ts'], 4 | transform: { 5 | '^.+\\.tsx?$': 'ts-jest' 6 | } 7 | }; 8 | -------------------------------------------------------------------------------- /cdk/lambda/authorizer/index.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file 4 | # except in compliance with the License. A copy of the License is located at 5 | # 6 | # http://aws.amazon.com/apache2.0/ 7 | # 8 | # or in the "license" file accompanying this file. This file is distributed on an "AS IS" 9 | # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | # License for the specific language governing permissions and limitations under the License. 11 | from __future__ import print_function 12 | 13 | import json 14 | import time 15 | import os 16 | import urllib.request 17 | from jose import jwk, jwt 18 | from jose.utils import base64url_decode 19 | import re 20 | import logging 21 | 22 | logging.basicConfig(level=logging.INFO) # change me to DEBUG and redeploy if needed 23 | 24 | REGION = os.environ['REGION'] 25 | APP_CLIENT_ID = os.environ['APP_CLIENT_ID'] 26 | KEYS_URL = os.environ['COGNITO_KEYS_URL'] 27 | BEARER_PREFIX = 'Bearer ' 28 | AJAX_API_PREFIX = '/ajax-api/2.0/mlflow' 29 | # instead of re-downloading the public keys every time 30 | # we download them only on cold start 31 | # https://aws.amazon.com/blogs/compute/container-reuse-in-lambda/ 32 | with urllib.request.urlopen(KEYS_URL) as f: 33 | response = f.read() 34 | keys = json.loads(response.decode('utf-8'))['keys'] 35 | 36 | def verify_token(token): 37 | # get the kid from the headers prior to verification 38 | headers = jwt.get_unverified_headers(token) 39 | kid = headers['kid'] 40 | # search for the kid in the downloaded public keys 41 | key_index = -1 42 | for i in range(len(keys)): 43 | if kid == keys[i]['kid']: 44 | key_index = i 45 | break 46 | if key_index == -1: 47 | logging.info('Public key not found in jwks.json') 48 | return False 49 | # construct the public key 50 | public_key = jwk.construct(keys[key_index]) 51 | # get the last two sections of the token, 52 | # message and signature (encoded in base64) 53 | message, encoded_signature = str(token).rsplit('.', 1) 54 | # decode the signature 55 | decoded_signature = base64url_decode(encoded_signature.encode('utf-8')) 56 | # verify the signature 57 | if not public_key.verify(message.encode("utf8"), decoded_signature): 58 | logging.info('Signature verification failed') 59 | return False 60 | 61 | # since we passed the verification, we can now safely 62 | # use the unverified claims 63 | claims = jwt.get_unverified_claims(token) 64 | # additionally we can verify the token expiration 65 | if time.time() > claims['exp']: 66 | logging.info('Token is expired') 67 | return False 68 | # and the Audience (use claims['client_id'] if verifying an access token) 69 | if claims['aud'] != APP_CLIENT_ID: 70 | logging.info('Token was not issued for this audience') 71 | return False 72 | # now we can use the claims: DO NOT PRINT FOR PRODUCTION 73 | # print(claims) 74 | return True 75 | 76 | def handler(event, context): 77 | # the event contains sensitive information. Should not be logged 78 | # print(event) 79 | request_type = event['type'] 80 | 81 | if request_type == 'TOKEN': 82 | token = event['authorizationToken'] 83 | elif request_type == 'REQUEST': 84 | token = event['headers']['Authorization'] 85 | else: 86 | raise Exception('Unsuported request type') 87 | 88 | if token.startswith(BEARER_PREFIX): 89 | token = token[len(BEARER_PREFIX):] 90 | 91 | if not verify_token(token): 92 | raise Exception('Unauthorized') 93 | 94 | claims = jwt.get_unverified_claims(token) 95 | principalId=claims['cognito:username'] 96 | 97 | tmp = event['methodArn'].split(':') 98 | apiGatewayArnTmp = tmp[5].split('/') 99 | awsAccountId = tmp[4] 100 | 101 | policy = AuthPolicy(principalId, awsAccountId) 102 | policy.restApiId = apiGatewayArnTmp[0] 103 | policy.region = tmp[3] 104 | policy.stage = apiGatewayArnTmp[1] 105 | 106 | groups = claims['cognito:groups'] 107 | logging.debug(f"cognito group extracted: {groups}") 108 | # Add your custom logic here 109 | # For example, you could depict a strategy based on experiment. However, 110 | # to verify if an individual run, or a model-version, or an artifact belongs to a run, 111 | # you must query the MLFlow api again to cross check, and only then authorize or not the 112 | # request. 113 | if 'admins' in groups: 114 | policy.allowAllMethods() 115 | elif 'readers' in groups: 116 | policy.allowMethod(HttpVerb.POST, f"{AJAX_API_PREFIX}/runs/search") 117 | policy.allowMethod(HttpVerb.POST, f"{AJAX_API_PREFIX}/experiments/search") 118 | policy.allowMethod(HttpVerb.GET, f"{AJAX_API_PREFIX}/*") 119 | policy.allowMethod(HttpVerb.GET, f"/get-artifact") 120 | policy.allowMethod(HttpVerb.GET, f"/model-versions/*") 121 | elif 'model-approvers' in groups: 122 | # user cannot do anything 123 | policy.allowMethod(HttpVerb.POST, f"{AJAX_API_PREFIX}/runs/search") 124 | policy.allowMethod(HttpVerb.POST, f"{AJAX_API_PREFIX}/experiments/search") 125 | policy.allowMethod(HttpVerb.POST, f"{AJAX_API_PREFIX}/registered-models/*") 126 | policy.allowMethod(HttpVerb.ALL, f"{AJAX_API_PREFIX}/model-versions/*") 127 | policy.allowMethod(HttpVerb.GET, f"{AJAX_API_PREFIX}/*") 128 | policy.allowMethod(HttpVerb.GET, f"/get-artifact") 129 | policy.allowMethod(HttpVerb.GET, f"/model-versions/*") 130 | else: 131 | logging.info('Unknown user group') 132 | return False 133 | 134 | # Finally, build the policy 135 | authResponse = policy.build() 136 | 137 | # new! -- add additional key-value pairs associated with the authenticated principal 138 | # these are made available by APIGW like so: $context.authorizer. 139 | # additional context is cached 140 | # context = { 141 | # 'key': 'value', # $context.authorizer.key -> value 142 | # 'number' : 1, 143 | # 'bool' : True 144 | # } 145 | # context['arr'] = ['foo'] <- this is invalid, APIGW will not accept it 146 | # context['obj'] = {'foo':'bar'} <- also invalid 147 | #authResponse['context'] = context 148 | 149 | # Check policy generated for this request 150 | logging.debug(f"policy built for this request: {authResponse}") 151 | return authResponse 152 | 153 | class HttpVerb: 154 | GET = "GET" 155 | POST = "POST" 156 | PUT = "PUT" 157 | PATCH = "PATCH" 158 | HEAD = "HEAD" 159 | DELETE = "DELETE" 160 | OPTIONS = "OPTIONS" 161 | ALL = "*" 162 | 163 | class AuthPolicy(object): 164 | awsAccountId = "" 165 | """The AWS account id the policy will be generated for. This is used to create the method ARNs.""" 166 | principalId = "" 167 | """The principal used for the policy, this should be a unique identifier for the end user.""" 168 | version = "2012-10-17" 169 | """The policy version used for the evaluation. This should always be '2012-10-17'""" 170 | pathRegex = "^[/.a-zA-Z0-9-\*]+$" 171 | """The regular expression used to validate resource paths for the policy""" 172 | 173 | """these are the internal lists of allowed and denied methods. These are lists 174 | of objects and each object has 2 properties: A resource ARN and a nullable 175 | conditions statement. 176 | the build method processes these lists and generates the approriate 177 | statements for the final policy""" 178 | allowMethods = [] 179 | denyMethods = [] 180 | 181 | 182 | restApiId = "<>" 183 | """ Replace the placeholder value with a default API Gateway API id to be used in the policy. 184 | Beware of using '*' since it will not simply mean any API Gateway API id, because stars will greedily expand over '/' or other separators. 185 | See https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_resource.html for more details. """ 186 | 187 | region = "<>" 188 | """ Replace the placeholder value with a default region to be used in the policy. 189 | Beware of using '*' since it will not simply mean any region, because stars will greedily expand over '/' or other separators. 190 | See https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_resource.html for more details. """ 191 | 192 | stage = "<>" 193 | """ Replace the placeholder value with a default stage to be used in the policy. 194 | Beware of using '*' since it will not simply mean any stage, because stars will greedily expand over '/' or other separators. 195 | See https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_resource.html for more details. """ 196 | 197 | def __init__(self, principal, awsAccountId): 198 | self.awsAccountId = awsAccountId 199 | self.principalId = principal 200 | self.allowMethods = [] 201 | self.denyMethods = [] 202 | 203 | def _addMethod(self, effect, verb, resource, conditions): 204 | """Adds a method to the internal lists of allowed or denied methods. Each object in 205 | the internal list contains a resource ARN and a condition statement. The condition 206 | statement can be null.""" 207 | if verb != "*" and not hasattr(HttpVerb, verb): 208 | raise NameError("Invalid HTTP verb " + verb + ". Allowed verbs in HttpVerb class") 209 | resourcePattern = re.compile(self.pathRegex) 210 | if not resourcePattern.match(resource): 211 | raise NameError("Invalid resource path: " + resource + ". Path should match " + self.pathRegex) 212 | 213 | if resource[:1] == "/": 214 | resource = resource[1:] 215 | 216 | resourceArn = ("arn:aws:execute-api:" + 217 | self.region + ":" + 218 | self.awsAccountId + ":" + 219 | self.restApiId + "/" + 220 | self.stage + "/" + 221 | verb + "/" + 222 | resource) 223 | 224 | if effect.lower() == "allow": 225 | self.allowMethods.append({ 226 | 'resourceArn' : resourceArn, 227 | 'conditions' : conditions 228 | }) 229 | elif effect.lower() == "deny": 230 | self.denyMethods.append({ 231 | 'resourceArn' : resourceArn, 232 | 'conditions' : conditions 233 | }) 234 | 235 | def _getEmptyStatement(self, effect): 236 | """Returns an empty statement object prepopulated with the correct action and the 237 | desired effect.""" 238 | statement = { 239 | 'Action': 'execute-api:Invoke', 240 | 'Effect': effect[:1].upper() + effect[1:].lower(), 241 | 'Resource': [] 242 | } 243 | 244 | return statement 245 | 246 | def _getStatementForEffect(self, effect, methods): 247 | """This function loops over an array of objects containing a resourceArn and 248 | conditions statement and generates the array of statements for the policy.""" 249 | statements = [] 250 | 251 | if len(methods) > 0: 252 | statement = self._getEmptyStatement(effect) 253 | 254 | for curMethod in methods: 255 | if curMethod['conditions'] is None or len(curMethod['conditions']) == 0: 256 | statement['Resource'].append(curMethod['resourceArn']) 257 | else: 258 | conditionalStatement = self._getEmptyStatement(effect) 259 | conditionalStatement['Resource'].append(curMethod['resourceArn']) 260 | conditionalStatement['Condition'] = curMethod['conditions'] 261 | statements.append(conditionalStatement) 262 | 263 | statements.append(statement) 264 | 265 | return statements 266 | 267 | def allowAllMethods(self): 268 | """Adds a '*' allow to the policy to authorize access to all methods of an API""" 269 | self._addMethod("Allow", HttpVerb.ALL, "*", []) 270 | 271 | def denyAllMethods(self): 272 | """Adds a '*' allow to the policy to deny access to all methods of an API""" 273 | self._addMethod("Deny", HttpVerb.ALL, "*", []) 274 | 275 | def allowMethod(self, verb, resource): 276 | """Adds an API Gateway method (Http verb + Resource path) to the list of allowed 277 | methods for the policy""" 278 | self._addMethod("Allow", verb, resource, []) 279 | 280 | def denyMethod(self, verb, resource): 281 | """Adds an API Gateway method (Http verb + Resource path) to the list of denied 282 | methods for the policy""" 283 | self._addMethod("Deny", verb, resource, []) 284 | 285 | def allowMethodWithConditions(self, verb, resource, conditions): 286 | """Adds an API Gateway method (Http verb + Resource path) to the list of allowed 287 | methods and includes a condition for the policy statement. More on AWS policy 288 | conditions here: http://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements.html#Condition""" 289 | self._addMethod("Allow", verb, resource, conditions) 290 | 291 | def denyMethodWithConditions(self, verb, resource, conditions): 292 | """Adds an API Gateway method (Http verb + Resource path) to the list of denied 293 | methods and includes a condition for the policy statement. More on AWS policy 294 | conditions here: http://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements.html#Condition""" 295 | self._addMethod("Deny", verb, resource, conditions) 296 | 297 | def build(self): 298 | """Generates the policy document based on the internal lists of allowed and denied 299 | conditions. This will generate a policy with two main statements for the effect: 300 | one statement for Allow and one statement for Deny. 301 | Methods that includes conditions will have their own statement in the policy.""" 302 | if ((self.allowMethods is None or len(self.allowMethods) == 0) and 303 | (self.denyMethods is None or len(self.denyMethods) == 0)): 304 | raise NameError("No statements defined for the policy") 305 | 306 | policy = { 307 | 'principalId' : self.principalId, 308 | 'policyDocument' : { 309 | 'Version' : self.version, 310 | 'Statement' : [] 311 | } 312 | } 313 | 314 | policy['policyDocument']['Statement'].extend(self._getStatementForEffect("Allow", self.allowMethods)) 315 | policy['policyDocument']['Statement'].extend(self._getStatementForEffect("Deny", self.denyMethods)) 316 | 317 | return policy 318 | 319 | # the following is useful to make this script executable in both 320 | # AWS Lambda and any other local environments 321 | if __name__ == '__main__': 322 | # for testing locally you can enter the JWT ID Token here 323 | event = { 324 | 'type': 'TOKEN', 325 | 'authorizationToken': 'Bearer ', 326 | 'methodArn': 'arn:aws:execute-api:::/prod/GET/ajax-api/2.0/preview/mlflow/experiments/list' 327 | } 328 | handler(event, None) 329 | -------------------------------------------------------------------------------- /cdk/lambda/authorizer/requirements.txt: -------------------------------------------------------------------------------- 1 | python-jose -------------------------------------------------------------------------------- /cdk/lib/amplify-mlflow-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from 'aws-cdk-lib'; 2 | import { Construct } from 'constructs'; 3 | 4 | import * as apigateway from "aws-cdk-lib/aws-apigateway"; 5 | import * as amplify from "@aws-cdk/aws-amplify-alpha"; 6 | import * as codebuild from "aws-cdk-lib/aws-codebuild"; 7 | import * as cognito from "aws-cdk-lib/aws-cognito"; 8 | import * as codecommit from "aws-cdk-lib/aws-codecommit"; 9 | import * as ssm from 'aws-cdk-lib/aws-ssm'; 10 | import * as cr from 'aws-cdk-lib/custom-resources'; 11 | import * as iam from 'aws-cdk-lib/aws-iam'; 12 | 13 | import { IdentityPool } from '@aws-cdk/aws-cognito-identitypool-alpha'; 14 | import { NagSuppressions } from 'cdk-nag' 15 | 16 | export class AmplifyMlflowStack extends cdk.Stack { 17 | constructor( 18 | scope: Construct, 19 | id: string, 20 | restApiGateway: apigateway.RestApi, 21 | cognitoUserPool: cognito.UserPool, 22 | cognitoIdentityPool: IdentityPool, 23 | cognitoUserPoolClient: cognito.UserPoolClient, 24 | sagemakerStudioDomainId: string, 25 | props?: cdk.StackProps 26 | ) { 27 | super(scope, id, props); 28 | 29 | const repo = new codecommit.Repository(this, 'Repository', { 30 | repositoryName: 'mlflow-2.12.2-patched', 31 | description: 'MLflow v2.12.2 with cognito patch', // optional property 32 | code: codecommit.Code.fromDirectory('../mlflow/mlflow/server/js', 'main') 33 | }); 34 | 35 | const AccessControlAllowOriginHeader: amplify.CustomResponseHeader = { 36 | headers: { 37 | 'Access-Control-Allow-Origin': `https://${sagemakerStudioDomainId}.studio.${this.region}.sagemaker.aws`, 38 | }, 39 | pattern: '*', 40 | }; 41 | 42 | const amplifyApp = new amplify.App(this, 'Mlflow-UI', { 43 | sourceCodeProvider: new amplify.CodeCommitSourceCodeProvider({ repository: repo }), 44 | buildSpec: codebuild.BuildSpec.fromObjectToYaml({ 45 | // Alternatively add a `amplify.yml` to the repo 46 | version: '1.0', 47 | applications: [{ 48 | frontend: { 49 | phases: { 50 | preBuild: { 51 | commands: [ 52 | 'sudo fallocate -l 4G /swapfile', 53 | 'sudo chmod 600 /swapfile', 54 | 'sudo mkswap /swapfile', 55 | 'sudo swapon /swapfile', 56 | 'sudo swapon -s', 57 | 'yarn install' 58 | ], 59 | }, 60 | build: { 61 | commands: [ 62 | 'echo "REACT_APP_REGION=$REACT_APP_REGION" >> .env', 63 | 'echo "REACT_APP_COGNITO_USER_POOL_ID=$REACT_APP_COGNITO_USER_POOL_ID" >> .env', 64 | 'echo "REACT_APP_COGNITO_IDENTITY_POOL_ID=$REACT_APP_COGNITO_IDENTITY_POOL_ID" >> .env', 65 | 'echo "REACT_APP_COGNITO_USER_POOL_CLIENT_ID=$REACT_APP_COGNITO_USER_POOL_CLIENT_ID" >> .env', 66 | 'yarn run build' 67 | ], 68 | }, 69 | }, 70 | artifacts: { 71 | baseDirectory: 'build', 72 | files: ['**/*'], 73 | } 74 | }, 75 | }] 76 | }), 77 | environmentVariables: { 78 | '_LIVE_UPDATES': `[{"pkg":"@aws-amplify/cli","type":"npm","version":"9.2.1"}]`, 79 | '_BUILD_TIMEOUT': '60', 80 | 'REACT_APP_REGION': this.region, 81 | 'REACT_APP_COGNITO_USER_POOL_ID': cognitoUserPool.userPoolId, 82 | 'REACT_APP_COGNITO_IDENTITY_POOL_ID': cognitoIdentityPool.identityPoolId, 83 | 'AMPLIFY_USERPOOL_ID': cognitoUserPool.userPoolId, 84 | 'AMPLIFY_IDENTITYPOOL_ID': cognitoIdentityPool.identityPoolId, 85 | 'REACT_APP_COGNITO_USER_POOL_CLIENT_ID': cognitoUserPoolClient.userPoolClientId 86 | }, 87 | customResponseHeaders: [AccessControlAllowOriginHeader] 88 | }) 89 | 90 | amplifyApp.addBranch('main') 91 | 92 | // Rule for static files 93 | amplifyApp.addCustomRule({ 94 | source: '/static-files/<*>', 95 | target: '/<*>', 96 | status: amplify.RedirectStatus.REWRITE 97 | }); 98 | 99 | // Rule for ajax-api 100 | amplifyApp.addCustomRule({ 101 | source: '/ajax-api/<*>', 102 | target: `${restApiGateway.url}ajax-api/<*>`, 103 | status: amplify.RedirectStatus.REWRITE 104 | }) 105 | 106 | // Rule for get-artifact 107 | amplifyApp.addCustomRule({ 108 | source: '/get-artifact', 109 | target: `${restApiGateway.url}get-artifact`, 110 | status: amplify.RedirectStatus.REWRITE 111 | }) 112 | 113 | // Rule for /model-version/get-artifact 114 | amplifyApp.addCustomRule({ 115 | source: '/model-versions/get-artifact', 116 | target: `${restApiGateway.url}model-versions/get-artifact`, 117 | status: amplify.RedirectStatus.REWRITE 118 | }) 119 | 120 | const lambdaBuildTriggerRole = new iam.Role(this, "lambdaAuthorizerRole", { 121 | assumedBy: new iam.ServicePrincipal("lambda.amazonaws.com"), 122 | inlinePolicies: { 123 | cloudWatch: new iam.PolicyDocument({ 124 | statements:[ 125 | new iam.PolicyStatement({ 126 | effect: iam.Effect.ALLOW, 127 | resources: [`arn:aws:logs:${this.region}:${this.account}:*`], 128 | actions: [ 129 | "logs:CreateLogGroup", 130 | "logs:CreateLogStream", 131 | "logs:PutLogEvents" 132 | ] 133 | }), 134 | new iam.PolicyStatement({ 135 | effect: iam.Effect.ALLOW, 136 | resources: [`arn:aws:amplify:${this.region}:${this.account}:apps/${amplifyApp.appId}/branches/main/jobs/*`], 137 | actions: ["amplify:StartJob"] 138 | }) 139 | ] 140 | }), 141 | } 142 | }) 143 | 144 | const buildTrigger = new cr.AwsCustomResource(this, 'triggerAppBuild', { 145 | role: lambdaBuildTriggerRole, 146 | onCreate: { 147 | service: 'Amplify', 148 | action: 'startJob', 149 | physicalResourceId: cr.PhysicalResourceId.of('app-build-trigger'), 150 | apiVersion: '9.2.1', 151 | parameters: { 152 | appId: amplifyApp.appId, 153 | branchName: 'main', 154 | jobType: 'RELEASE', 155 | jobReason: 'Auto Start build', 156 | } 157 | }, 158 | }); 159 | 160 | const mlflowUiUrl = new ssm.StringParameter(this, 'mlflowUiUrl', { 161 | parameterName: 'mlflow-uiUrl', 162 | stringValue: `https://main.${amplifyApp.defaultDomain}` 163 | }); 164 | 165 | NagSuppressions.addResourceSuppressions(lambdaBuildTriggerRole, [ 166 | { 167 | id: 'AwsSolutions-IAM5', 168 | reason: 'Permissions needed by the lambda function to trigger the first build job', 169 | } 170 | ]) 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /cdk/lib/mlflow-vpc-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from 'aws-cdk-lib'; 2 | import { Construct } from 'constructs'; 3 | 4 | import * as elbv2 from "aws-cdk-lib/aws-elasticloadbalancingv2"; 5 | import * as ec2 from "aws-cdk-lib/aws-ec2"; 6 | import * as ecs from "aws-cdk-lib/aws-ecs"; 7 | import * as iam from "aws-cdk-lib/aws-iam"; 8 | import * as logs from "aws-cdk-lib/aws-logs"; 9 | import * as servicediscovery from "aws-cdk-lib/aws-servicediscovery"; 10 | import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; 11 | import * as s3 from 'aws-cdk-lib/aws-s3'; 12 | import { CfnDBCluster, CfnDBSubnetGroup } from 'aws-cdk-lib/aws-rds'; 13 | 14 | import { NagSuppressions } from 'cdk-nag' 15 | import {Platform} from "aws-cdk-lib/aws-ecr-assets"; 16 | 17 | const { Protocol } = elbv2; 18 | const dbName = "mlflowdb" 19 | const dbPort = 5432 20 | const dbUsername = "master" 21 | const clusterName = "mlflowCluster" 22 | const mlflowServerServiceName = "mlflowServerService" 23 | const mlflowGatewayServiceName = "mlflowGatewayService" 24 | const cidr = "10.0.0.0/16" 25 | const mlflowContainerPort = 5000 26 | const gatewayContainerPort = 5001 27 | const listenerMlflowPort= 8080 28 | const listenerMlflowGatewayPort = 8081 29 | const gatewayPrivateHostname = "mlflow-gateway" 30 | const serverPrivateHostname = "mlflow-server" 31 | const privateHostname = 'api.local' 32 | 33 | export class MLflowVpcStack extends cdk.Stack { 34 | 35 | // Export Vpc, ALB Listener, and Mlflow secret ARN 36 | public readonly httpMlflowServerListener: elbv2.NetworkListener; 37 | public readonly httpMlflowGatewayListener: elbv2.NetworkListener; 38 | public readonly vpc: ec2.Vpc; 39 | public readonly httpApiInternalNLB: elbv2.NetworkLoadBalancer; 40 | public readonly accessLogs: s3.Bucket; 41 | 42 | readonly bucketName = `mlflow-${this.account}-${this.region}` 43 | readonly accesslogBucketName = `accesslogs-${this.account}-${this.region}` 44 | 45 | constructor( 46 | scope: Construct, 47 | id: string, 48 | props?: cdk.StackProps 49 | ) { 50 | super(scope, id, props); 51 | 52 | const logGroup = new logs.LogGroup(this, 'MyVpcLogGroup'); 53 | 54 | const flowLogsRole = new iam.Role(this, 'flowLogsRole', { 55 | assumedBy: new iam.ServicePrincipal('vpc-flow-logs.amazonaws.com') 56 | }); 57 | 58 | // VPC 59 | this.vpc = new ec2.Vpc(this, 'MLFlowVPC', { 60 | ipAddresses: ec2.IpAddresses.cidr(cidr), 61 | natGateways: 1, 62 | maxAzs: 2, 63 | subnetConfiguration: [ 64 | { 65 | name: 'public', 66 | subnetType: ec2.SubnetType.PUBLIC, 67 | cidrMask: 24, 68 | mapPublicIpOnLaunch: false 69 | }, 70 | { 71 | name: 'private', 72 | subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS, 73 | cidrMask: 26, 74 | }, 75 | { 76 | name: 'isolated', 77 | subnetType: ec2.SubnetType.PRIVATE_ISOLATED, 78 | cidrMask: 28, 79 | }, 80 | ], 81 | }); 82 | 83 | new ec2.FlowLog(this, 'FlowLog', { 84 | resourceType: ec2.FlowLogResourceType.fromVpc(this.vpc), 85 | destination: ec2.FlowLogDestination.toCloudWatchLogs(logGroup, flowLogsRole) 86 | }); 87 | 88 | this.accessLogs = new s3.Bucket(this, "accessLogs", { 89 | versioned: false, 90 | bucketName: this.accesslogBucketName, 91 | publicReadAccess: false, 92 | blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, 93 | removalPolicy: cdk.RemovalPolicy.DESTROY, 94 | autoDeleteObjects: true, 95 | encryption: s3.BucketEncryption.KMS_MANAGED, 96 | enforceSSL: true, 97 | objectOwnership: s3.ObjectOwnership.BUCKET_OWNER_PREFERRED 98 | }) 99 | 100 | // mlflow S3 bucket 101 | const mlFlowBucket = new s3.Bucket(this, "mlFlowBucket", { 102 | versioned: false, 103 | bucketName: this.bucketName, 104 | publicReadAccess: false, 105 | blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, 106 | removalPolicy: cdk.RemovalPolicy.DESTROY, 107 | autoDeleteObjects: true, 108 | encryption: s3.BucketEncryption.KMS_MANAGED, 109 | enforceSSL: true, 110 | serverAccessLogsBucket: this.accessLogs, 111 | serverAccessLogsPrefix: 'mlflow-server' 112 | }) 113 | 114 | // DB SubnetGroup 115 | const subnetIds: string[] = []; 116 | this.vpc.isolatedSubnets.forEach((subnet, index) => { 117 | subnetIds.push(subnet.subnetId); 118 | }); 119 | 120 | const dbSubnetGroup: CfnDBSubnetGroup = new CfnDBSubnetGroup(this, 'AuroraSubnetGroup', { 121 | dbSubnetGroupDescription: 'Subnet group to access aurora', 122 | dbSubnetGroupName: 'aurora-serverless-subnet-group', 123 | subnetIds 124 | }); 125 | 126 | // DB Credentials 127 | const databaseCredentialsSecret = new secretsmanager.Secret(this, 'DBCredentialsSecret', { 128 | secretName: `mlflow-database-credentials`, 129 | generateSecretString: { 130 | secretStringTemplate: JSON.stringify({ 131 | username: dbUsername, 132 | }), 133 | excludePunctuation: true, 134 | includeSpace: false, 135 | generateStringKey: 'password' 136 | } 137 | }); 138 | 139 | // DB SecurityGroup 140 | const dbClusterSecurityGroup = new ec2.SecurityGroup(this, 'DBClusterSecurityGroup', 141 | { 142 | vpc: this.vpc, 143 | allowAllOutbound: false 144 | } 145 | ); 146 | 147 | dbClusterSecurityGroup.addIngressRule(ec2.Peer.ipv4(cidr), ec2.Port.tcp(dbPort)); 148 | 149 | const dbConfig = { 150 | dbClusterIdentifier: `${mlflowServerServiceName}-cluster`, 151 | engineMode: 'serverless', 152 | engine: 'aurora-postgresql', 153 | engineVersion: '13.12', 154 | databaseName: dbName, 155 | deletionProtection: false, 156 | masterUsername: databaseCredentialsSecret.secretValueFromJson('username').toString(), 157 | masterUserPassword: databaseCredentialsSecret.secretValueFromJson('password').toString(), 158 | dbSubnetGroupName: dbSubnetGroup.dbSubnetGroupName, 159 | scalingConfiguration: { 160 | autoPause: true, 161 | maxCapacity: 2, 162 | minCapacity: 2, 163 | secondsUntilAutoPause: 3600, 164 | }, 165 | vpcSecurityGroupIds: [ 166 | dbClusterSecurityGroup.securityGroupId 167 | ], 168 | storageEncrypted: true, 169 | removalPolicy: cdk.RemovalPolicy.DESTROY // Delete everything 170 | }; 171 | 172 | // 👇 RDS Cluster 173 | const rdsCluster = new CfnDBCluster(this, 'DBCluster', dbConfig); 174 | rdsCluster.addDependency(dbSubnetGroup) 175 | 176 | // 👇 ECS Cluster 177 | const cluster = new ecs.Cluster(this, "MLflowCluster", { 178 | vpc: this.vpc, 179 | clusterName: clusterName, 180 | containerInsights: true 181 | }); 182 | 183 | // Network Load Balancer 184 | this.httpApiInternalNLB = new elbv2.NetworkLoadBalancer( 185 | this, 186 | "httpapiInternalALB", 187 | { 188 | vpc: this.vpc, 189 | internetFacing: false, 190 | } 191 | ); 192 | 193 | // Security Group 194 | const mlflowSecGrp = new ec2.SecurityGroup( 195 | this, 196 | "mlflowServiceSecurityGroup", 197 | { 198 | vpc: this.vpc, 199 | } 200 | ); 201 | 202 | mlflowSecGrp.addIngressRule(ec2.Peer.ipv4(cidr), ec2.Port.tcp(mlflowContainerPort), 'Allow internal access to the mlflow server port'); 203 | mlflowSecGrp.addIngressRule(ec2.Peer.ipv4(cidr), ec2.Port.tcp(gatewayContainerPort), 'Allow internal access to the mlflow gateway port'); 204 | mlflowSecGrp.addIngressRule(ec2.Peer.ipv4(cidr), ec2.Port.tcp(listenerMlflowPort), 'Allow internal access to the container port'); 205 | mlflowSecGrp.addIngressRule(ec2.Peer.ipv4(cidr), ec2.Port.tcp(listenerMlflowGatewayPort), 'Allow internal access to the container port'); 206 | 207 | 208 | // 👇 Cloud Map Namespace 209 | const dnsNamespace = new servicediscovery.PrivateDnsNamespace( 210 | this, 211 | "DnsNamespace", 212 | { 213 | name: privateHostname, 214 | vpc: this.vpc, 215 | description: "Private DnsNamespace for Microservices", 216 | } 217 | ); 218 | 219 | const withoutPolicyUpdatesOptions: iam.WithoutPolicyUpdatesOptions = { 220 | addGrantsToResources: false, 221 | }; 222 | 223 | // 👇 Fargate Task Role 224 | const mlflowServertaskrole = new iam.Role(this, "ecsTaskExecutionRole", { 225 | assumedBy: new iam.ServicePrincipal("ecs-tasks.amazonaws.com"), 226 | managedPolicies: [ 227 | iam.ManagedPolicy.fromAwsManagedPolicyName("service-role/AmazonECSTaskExecutionRolePolicy") 228 | ], 229 | inlinePolicies: { 230 | s3Bucket: new iam.PolicyDocument({ 231 | statements:[ 232 | new iam.PolicyStatement({ 233 | effect: iam.Effect.ALLOW, 234 | resources: [ 235 | `arn:aws:s3:::${this.bucketName}`, 236 | `arn:aws:s3:::${this.bucketName}/*` 237 | ], 238 | actions: [ 239 | "s3:ListBucket", 240 | "s3:GetObject", 241 | "s3:PutObject", 242 | "s3:DeleteObject", 243 | "s3:PutObjectTagging", 244 | "s3:DeleteObjectTagging", 245 | "s3:GetBucketTagging", 246 | "s3:GetObjectTagging" 247 | ] 248 | }) 249 | ] 250 | }), 251 | secretsManagerRestricted: new iam.PolicyDocument({ 252 | statements: [ 253 | new iam.PolicyStatement({ 254 | effect: iam.Effect.ALLOW, 255 | resources: [ 256 | databaseCredentialsSecret.secretArn 257 | ], 258 | actions: [ 259 | "secretsmanager:GetResourcePolicy", 260 | "secretsmanager:GetSecretValue", 261 | "secretsmanager:DescribeSecret", 262 | "secretsmanager:ListSecretVersionIds" 263 | ] 264 | }), 265 | ] 266 | }) 267 | } 268 | }); 269 | 270 | const mlflowGatewaytaskrole = new iam.Role(this, "mlflowGatewaytaskrole", { 271 | assumedBy: new iam.ServicePrincipal("ecs-tasks.amazonaws.com"), 272 | managedPolicies: [ 273 | iam.ManagedPolicy.fromAwsManagedPolicyName("service-role/AmazonECSTaskExecutionRolePolicy") 274 | ], 275 | inlinePolicies: { 276 | secretsManagerRestricted: new iam.PolicyDocument({ 277 | statements: [ 278 | new iam.PolicyStatement({ 279 | effect: iam.Effect.ALLOW, 280 | resources: [ 281 | "*" //TODO: Add ARNs of API keys 282 | ], 283 | actions: [ 284 | "secretsmanager:GetResourcePolicy", 285 | "secretsmanager:GetSecretValue", 286 | "secretsmanager:DescribeSecret", 287 | "secretsmanager:ListSecretVersionIds" 288 | ] 289 | }), 290 | ] 291 | }), 292 | bedrock: new iam.PolicyDocument({ 293 | statements: [ 294 | new iam.PolicyStatement({ 295 | effect: iam.Effect.ALLOW, 296 | resources: [ 297 | "*" 298 | ], 299 | actions: [ 300 | "bedrock:InvokeModel", 301 | "bedrock:InvokeModelWithResponseStream" 302 | ] 303 | }), 304 | ] 305 | }) 306 | } 307 | }); 308 | 309 | // MLflow server Task Definitions 310 | const mlflowServerTaskDefinition = new ecs.FargateTaskDefinition( 311 | this, 312 | "mlflowServerTaskDef", 313 | { 314 | taskRole: mlflowServertaskrole.withoutPolicyUpdates(withoutPolicyUpdatesOptions), 315 | executionRole: mlflowServertaskrole.withoutPolicyUpdates(withoutPolicyUpdatesOptions), 316 | family: "MlflowServerStack", 317 | cpu: 512, 318 | memoryLimitMiB: 1024 319 | }, 320 | ); 321 | 322 | // MLflow server Task Definitions 323 | const mlflowGatewayTaskDefinition = new ecs.FargateTaskDefinition( 324 | this, 325 | "mlflowGatewayTaskDef", 326 | { 327 | taskRole: mlflowGatewaytaskrole.withoutPolicyUpdates(withoutPolicyUpdatesOptions), 328 | executionRole: mlflowGatewaytaskrole.withoutPolicyUpdates(withoutPolicyUpdatesOptions), 329 | family: "MlflowGatewayStack", 330 | cpu: 512, 331 | memoryLimitMiB: 1024 332 | }, 333 | ); 334 | 335 | // 👇 Log Groups 336 | const mlflowServerServiceLogGroup = new logs.LogGroup(this, "mlflowServiceLogGroup", { 337 | logGroupName: "/ecs/mlflowServerService", 338 | removalPolicy: cdk.RemovalPolicy.DESTROY, 339 | }); 340 | 341 | // 👇 Log Groups 342 | const mlflowGatewayServiceLogGroup = new logs.LogGroup(this, "mlflowGatewayLogGroup", { 343 | logGroupName: "/ecs/mlflowGatewayService", 344 | removalPolicy: cdk.RemovalPolicy.DESTROY, 345 | }); 346 | 347 | const mlflowServerServiceLogDriver = new ecs.AwsLogDriver({ 348 | logGroup: mlflowServerServiceLogGroup, 349 | streamPrefix: "mlflowServerService", 350 | }); 351 | 352 | const mlflowGatewayServiceLogDriver = new ecs.AwsLogDriver({ 353 | logGroup: mlflowGatewayServiceLogGroup, 354 | streamPrefix: "mlflowGatewayService", 355 | }); 356 | 357 | // MlFlow Task Container 358 | const mlflowServerServiceContainer = mlflowServerTaskDefinition.addContainer( 359 | "mlflowServerContainer", 360 | { 361 | containerName: "mlflowServerContainer", 362 | essential: true, 363 | memoryReservationMiB: 1024, 364 | cpu: 512, 365 | portMappings: [{ 366 | containerPort: mlflowContainerPort, 367 | protocol: ecs.Protocol.TCP, 368 | }], 369 | image: ecs.ContainerImage.fromAsset('../src/mlflow-server', { 370 | platform: Platform.LINUX_AMD64, 371 | buildArgs: { 372 | PORT: `${mlflowContainerPort}` 373 | } 374 | }), 375 | environment: { 376 | 'PORT': `${mlflowContainerPort}`, 377 | 'BUCKET': `s3://${mlFlowBucket.bucketName}`, 378 | 'DBHOST': rdsCluster.attrEndpointAddress, 379 | 'DBPORT': `${dbPort}`, 380 | 'DATABASE': dbName, 381 | 'MLFLOW_DEPLOYMENTS_TARGET': `http://${gatewayPrivateHostname}.${privateHostname}:${gatewayContainerPort}` 382 | }, 383 | secrets: { 384 | USERNAME: ecs.Secret.fromSecretsManager(databaseCredentialsSecret, 'username'), 385 | PASSWORD: ecs.Secret.fromSecretsManager(databaseCredentialsSecret, 'password') 386 | }, 387 | logging: mlflowServerServiceLogDriver, 388 | }); 389 | 390 | // MlFlow Task Container 391 | const mlflowGatewayServiceContainer = mlflowGatewayTaskDefinition.addContainer( 392 | "mlflowGatewayContainer", 393 | { 394 | containerName: "mlflowGatewayContainer", 395 | essential: true, 396 | memoryReservationMiB: 1024, 397 | cpu: 512, 398 | portMappings: [{ 399 | containerPort: gatewayContainerPort, 400 | protocol: ecs.Protocol.TCP, 401 | }], 402 | image: ecs.ContainerImage.fromAsset('../src/mlflow-gateway', { 403 | platform: Platform.LINUX_AMD64, 404 | buildArgs: { 405 | PORT: `${gatewayContainerPort}` 406 | } 407 | }), 408 | environment: { 409 | 'PORT': `${gatewayContainerPort}`, 410 | 'WORKERS': '5', 411 | 'AWS_REGION': this.region, 412 | 'AWS_DEFAULT_REGION': this.region 413 | }, 414 | secrets: { 415 | }, 416 | logging: mlflowGatewayServiceLogDriver, 417 | }); 418 | 419 | // MLflow server Services 420 | const mlflowServerService = new ecs.FargateService(this, "mlflowServerService", { 421 | cluster: cluster, 422 | serviceName: mlflowServerServiceName, 423 | taskDefinition: mlflowServerTaskDefinition, 424 | assignPublicIp: false, 425 | desiredCount: 2, 426 | securityGroups: [mlflowSecGrp], 427 | cloudMapOptions: { 428 | name: serverPrivateHostname, 429 | cloudMapNamespace: dnsNamespace, 430 | }, 431 | }); 432 | 433 | // MLflow gateway Services 434 | const mlflowGatewayService = new ecs.FargateService(this, "mlflowGatewayService", { 435 | cluster: cluster, 436 | serviceName: mlflowGatewayServiceName, 437 | taskDefinition: mlflowGatewayTaskDefinition, 438 | assignPublicIp: false, 439 | desiredCount: 2, 440 | securityGroups: [mlflowSecGrp], 441 | cloudMapOptions: { 442 | name: gatewayPrivateHostname, 443 | cloudMapNamespace: dnsNamespace, 444 | }, 445 | }); 446 | 447 | // NLB MLflow server Listener 448 | this.httpMlflowServerListener = this.httpApiInternalNLB.addListener("httpMlflowServerListener", { 449 | port: listenerMlflowPort, 450 | protocol: Protocol.TCP 451 | }); 452 | 453 | // NLB MLflow Gateway Listener 454 | this.httpMlflowGatewayListener = this.httpApiInternalNLB.addListener("httpMlflowGatewayListener", { 455 | port: listenerMlflowGatewayPort, 456 | protocol: Protocol.TCP 457 | }); 458 | 459 | // MLflow server Target Groups 460 | const mlflowServiceTargetGroup = this.httpMlflowServerListener.addTargets( 461 | "mlflowServiceTargetGroup", 462 | { 463 | targets: [ 464 | mlflowServerService.loadBalancerTarget( 465 | { 466 | containerName: 'mlflowServerContainer', 467 | containerPort: mlflowContainerPort 468 | } 469 | ) 470 | ], 471 | port: listenerMlflowPort, 472 | } 473 | ); 474 | 475 | // MLflow gateway Target Groups 476 | const mlflowGatewayTargetGroup = this.httpMlflowGatewayListener.addTargets( 477 | "mlflowGatewayTargetGroup", 478 | { 479 | targets: [ 480 | mlflowGatewayService.loadBalancerTarget( 481 | { 482 | containerName: 'mlflowGatewayContainer', 483 | containerPort: gatewayContainerPort 484 | } 485 | ) 486 | ], 487 | port: listenerMlflowGatewayPort, 488 | } 489 | ); 490 | // MLflow server Task Auto Scaling 491 | const mlflowServerAutoScaling = mlflowServerService.autoScaleTaskCount({ maxCapacity: 6 }); 492 | mlflowServerAutoScaling.scaleOnCpuUtilization('MlflowServerCpuScaling', { 493 | targetUtilizationPercent: 70, 494 | scaleInCooldown: cdk.Duration.seconds(60), 495 | scaleOutCooldown: cdk.Duration.seconds(60), 496 | }); 497 | 498 | // MLflow gateway Task Auto Scaling 499 | const mlflowGatewayAutoScaling = mlflowGatewayService.autoScaleTaskCount({ maxCapacity: 6 }); 500 | mlflowGatewayAutoScaling.scaleOnCpuUtilization('MlflowGatewayCpuScaling', { 501 | targetUtilizationPercent: 70, 502 | scaleInCooldown: cdk.Duration.seconds(60), 503 | scaleOutCooldown: cdk.Duration.seconds(60), 504 | }); 505 | 506 | NagSuppressions.addResourceSuppressions(mlflowServerTaskDefinition, [ 507 | { 508 | id: 'AwsSolutions-ECS2', 509 | reason: 'ENV variables passed do not contain secrets' 510 | }, 511 | ]) 512 | 513 | NagSuppressions.addResourceSuppressions(mlflowGatewayTaskDefinition, [ 514 | { 515 | id: 'AwsSolutions-ECS2', 516 | reason: 'ENV variables passed do not contain secrets' 517 | }, 518 | ]) 519 | 520 | NagSuppressions.addResourceSuppressions(mlflowServertaskrole, [ 521 | { 522 | id: 'AwsSolutions-IAM5', 523 | reason: 'The task owns this bucket and it should have full permissions on the objects', 524 | appliesTo: [`Resource::arn:aws:s3:::${this.bucketName}/*`] 525 | }, 526 | { 527 | id: 'AwsSolutions-IAM4', 528 | reason: 'The task needs access to this managed policy', 529 | appliesTo: ['Policy::arn::iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy'] 530 | } 531 | ] 532 | ) 533 | 534 | NagSuppressions.addResourceSuppressions(mlflowGatewaytaskrole, [ 535 | { 536 | id: 'AwsSolutions-IAM5', 537 | reason: 'The task owns this bucket and it should have full permissions on the objects', 538 | appliesTo: [`Resource::*`] 539 | }, 540 | { 541 | id: 'AwsSolutions-IAM4', 542 | reason: 'The task needs access to this managed policy', 543 | appliesTo: ['Policy::arn::iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy'] 544 | } 545 | ] 546 | ) 547 | 548 | NagSuppressions.addResourceSuppressions(databaseCredentialsSecret, [ 549 | { 550 | id: 'AwsSolutions-SMG4', 551 | reason: 'MLflow does not support database credentials rotation' 552 | } 553 | ]) 554 | 555 | NagSuppressions.addResourceSuppressions(this.accessLogs, [ 556 | { 557 | id: 'AwsSolutions-S1', 558 | reason: 'This is a already an access log bucket' 559 | } 560 | ]) 561 | 562 | NagSuppressions.addResourceSuppressions(rdsCluster, [ 563 | { 564 | id: 'AwsSolutions-RDS11', 565 | reason: 'We want to avoid creating confusion by obfuscating the standard Postgres port' 566 | }, 567 | { 568 | id: 'AwsSolutions-RDS10', 569 | reason: 'This is a sample and we encourage users to clean up after trying the solution' 570 | }, 571 | { 572 | id: 'AwsSolutions-RDS6', 573 | reason: 'MLflow does not support IAM authentication for the DB layer' 574 | } 575 | ]) 576 | 577 | NagSuppressions.addResourceSuppressions(this.httpApiInternalNLB, [ 578 | { 579 | id: 'AwsSolutions-ELB2', 580 | reason: 'This is an internal-only NLB listening on port 80. Access logs for NLB only works for a TLS listener as per documentation in https://docs.aws.amazon.com/elasticloadbalancing/latest/network/load-balancer-access-logs.html' 581 | }] 582 | ) 583 | 584 | new cdk.CfnOutput(this, "ALB Dns Name : ", { 585 | value: this.httpApiInternalNLB.loadBalancerDnsName, 586 | }); 587 | } 588 | } 589 | -------------------------------------------------------------------------------- /cdk/lib/rest-api-gateway-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from 'aws-cdk-lib'; 2 | import { Construct } from 'constructs'; 3 | 4 | import * as elbv2 from "aws-cdk-lib/aws-elasticloadbalancingv2"; 5 | import * as apigateway from "aws-cdk-lib/aws-apigateway"; 6 | import { PassthroughBehavior } from "aws-cdk-lib/aws-apigateway" 7 | import * as cognito from 'aws-cdk-lib/aws-cognito'; 8 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 9 | import * as logs from "aws-cdk-lib/aws-logs"; 10 | import * as lambdapython from '@aws-cdk/aws-lambda-python-alpha'; 11 | import * as ssm from 'aws-cdk-lib/aws-ssm'; 12 | import * as iam from 'aws-cdk-lib/aws-iam'; 13 | import { IdentityPool, UserPoolAuthenticationProvider } from '@aws-cdk/aws-cognito-identitypool-alpha'; 14 | 15 | import { NagSuppressions } from 'cdk-nag' 16 | 17 | export class RestApiGatewayStack extends cdk.Stack { 18 | public readonly restApi: apigateway.RestApi; 19 | public readonly userPool: cognito.UserPool; 20 | public readonly userPoolClient: cognito.UserPoolClient 21 | public readonly identityPool: IdentityPool; 22 | 23 | constructor( 24 | scope: Construct, 25 | id: string, 26 | httpApiInternalNLB: elbv2.NetworkLoadBalancer, 27 | props?: cdk.StackProps 28 | ) { 29 | super(scope, id, props); 30 | 31 | const link = new apigateway.VpcLink(this, 'link', { 32 | targets: [httpApiInternalNLB], 33 | }); 34 | 35 | // User Pool 36 | this.userPool = new cognito.UserPool(this, 'userpool', { 37 | userPoolName: 'mlflow-user-pool', 38 | selfSignUpEnabled: true, 39 | signInAliases: { 40 | email: true, 41 | }, 42 | autoVerify: { 43 | email: false, 44 | }, 45 | passwordPolicy: { 46 | minLength: 8, 47 | requireLowercase: true, 48 | requireDigits: true, 49 | requireUppercase: true, 50 | requireSymbols: true, 51 | }, 52 | accountRecovery: cognito.AccountRecovery.EMAIL_ONLY, 53 | removalPolicy: cdk.RemovalPolicy.DESTROY, 54 | }); 55 | 56 | const cfnUserPool = this.userPool.node.defaultChild as cognito.CfnUserPool; 57 | cfnUserPool.userPoolAddOns = { advancedSecurityMode: "ENFORCED" }; 58 | 59 | this.identityPool = new IdentityPool(this, 'mlflow-identity-pool', { 60 | identityPoolName: 'mlflow-identity-pool', 61 | authenticationProviders: { 62 | userPools: [new UserPoolAuthenticationProvider({ userPool: this.userPool })], 63 | }, 64 | }); 65 | 66 | this.userPoolClient = this.userPool.addClient('mlflow-app-client', { 67 | supportedIdentityProviders: [ 68 | cognito.UserPoolClientIdentityProvider.COGNITO, 69 | ], 70 | }); 71 | 72 | const defaultProxyApiIntegration = new apigateway.Integration( 73 | { 74 | type: apigateway.IntegrationType.HTTP_PROXY, 75 | integrationHttpMethod: 'ANY', 76 | options: { 77 | connectionType: apigateway.ConnectionType.VPC_LINK, 78 | vpcLink: link, 79 | passthroughBehavior: PassthroughBehavior.WHEN_NO_TEMPLATES, 80 | }, 81 | uri: `http://${httpApiInternalNLB.loadBalancerDnsName}/` 82 | } 83 | ); 84 | 85 | const logGroup = new logs.LogGroup(this, 'MLflowRestApiAccessLogs', { 86 | retention: 30, // Keep logs for 30 days 87 | }); 88 | 89 | this.restApi = new apigateway.RestApi(this, 'mlflow-rest-api', 90 | { 91 | defaultIntegration: defaultProxyApiIntegration, 92 | defaultMethodOptions: { 93 | methodResponses: [{ 94 | statusCode: "200" 95 | }], 96 | authorizationType: apigateway.AuthorizationType.IAM 97 | }, 98 | deployOptions: { 99 | accessLogDestination: new apigateway.LogGroupLogDestination(logGroup), 100 | accessLogFormat: apigateway.AccessLogFormat.jsonWithStandardFields(), 101 | loggingLevel: apigateway.MethodLoggingLevel.INFO, 102 | dataTraceEnabled: true 103 | }, 104 | cloudWatchRole: true 105 | } 106 | ) 107 | 108 | const lambdaAuthorizerRole = new iam.Role(this, "lambdaAuthorizerRole", { 109 | assumedBy: new iam.ServicePrincipal("lambda.amazonaws.com"), 110 | inlinePolicies: { 111 | cloudWatch: new iam.PolicyDocument({ 112 | statements:[ 113 | new iam.PolicyStatement({ 114 | effect: iam.Effect.ALLOW, 115 | resources: [`arn:aws:logs:${this.region}:${this.account}:*`], 116 | actions: [ 117 | "logs:CreateLogGroup", 118 | "logs:CreateLogStream", 119 | "logs:PutLogEvents" 120 | ] 121 | }) 122 | ] 123 | }), 124 | } 125 | }) 126 | 127 | const lambdaFunction = new lambdapython.PythonFunction(this, 'MyFunction', { 128 | entry: './lambda/authorizer/', // required 129 | runtime: lambda.Runtime.PYTHON_3_9, // required 130 | index: 'index.py', // optional, defaults to 'index.py' 131 | handler: 'handler', // optional, defaults to 'handler', 132 | reservedConcurrentExecutions: 100, // change as you see it fit 133 | role: lambdaAuthorizerRole, 134 | environment: { 135 | REGION: this.region, 136 | ACCOUNT: this.account, 137 | COGNITO_USER_POOL_ID: this.userPool.userPoolId, 138 | REST_API_ID: this.restApi.restApiId, 139 | COGNITO_KEYS_URL: `https://cognito-idp.${this.region}.amazonaws.com/${this.userPool.userPoolId}/.well-known/jwks.json`, 140 | APP_CLIENT_ID: this.userPoolClient.userPoolClientId 141 | }, 142 | }); 143 | 144 | const lambdaAuthorizer = new apigateway.RequestAuthorizer(this, 'lambda-authorizer', { 145 | handler: lambdaFunction, 146 | identitySources: [apigateway.IdentitySource.header('Authorization')], 147 | resultsCacheTtl: cdk.Duration.seconds(0) // Increase as you see it fit 148 | }); 149 | 150 | const proxyApiIntegration = new apigateway.Integration( 151 | { 152 | type: apigateway.IntegrationType.HTTP_PROXY, 153 | integrationHttpMethod: 'ANY', 154 | options: { 155 | connectionType: apigateway.ConnectionType.VPC_LINK, 156 | vpcLink: link, 157 | requestParameters: { 158 | 'integration.request.path.proxy': 'method.request.path.proxy' 159 | }, 160 | passthroughBehavior: PassthroughBehavior.WHEN_NO_TEMPLATES 161 | }, 162 | uri: `http://${httpApiInternalNLB.loadBalancerDnsName}:8080/{proxy}` 163 | } 164 | ); 165 | 166 | const rootProxy = this.restApi.root.addProxy({ 167 | defaultIntegration: proxyApiIntegration, 168 | defaultMethodOptions: { 169 | requestParameters: { 170 | 'method.request.path.proxy': true 171 | }, 172 | authorizer: lambdaAuthorizer, 173 | authorizationType: apigateway.AuthorizationType.CUSTOM, 174 | }, 175 | // "false" will require explicitly adding methods on the `proxy` resource 176 | anyMethod: true // "true" is the default 177 | }); 178 | 179 | const apiIntegration = new apigateway.Integration( 180 | { 181 | type: apigateway.IntegrationType.HTTP_PROXY, 182 | integrationHttpMethod: 'ANY', 183 | options: { 184 | connectionType: apigateway.ConnectionType.VPC_LINK, 185 | vpcLink: link, 186 | requestParameters: { 187 | 'integration.request.path.proxy': 'method.request.path.proxy' 188 | }, 189 | passthroughBehavior: PassthroughBehavior.WHEN_NO_TEMPLATES 190 | }, 191 | uri: `http://${httpApiInternalNLB.loadBalancerDnsName}:8080/api/{proxy}` 192 | }); 193 | 194 | // /api/ 195 | const apiResource = this.restApi.root.addResource('api') 196 | 197 | apiResource.addProxy({ 198 | defaultIntegration: apiIntegration, 199 | defaultMethodOptions: { 200 | requestParameters: { 201 | 'method.request.path.proxy': true 202 | }, 203 | authorizationType: apigateway.AuthorizationType.IAM, 204 | }, 205 | // "false" will require explicitly adding methods on the `proxy` resource 206 | anyMethod: true // "true" is the default 207 | }); 208 | 209 | const routesApiGatewayIntegration = new apigateway.Integration( 210 | { 211 | type: apigateway.IntegrationType.HTTP_PROXY, 212 | integrationHttpMethod: 'ANY', 213 | options: { 214 | connectionType: apigateway.ConnectionType.VPC_LINK, 215 | vpcLink: link, 216 | requestParameters: { 217 | 'integration.request.path.proxy': 'method.request.path.proxy' 218 | }, 219 | passthroughBehavior: PassthroughBehavior.WHEN_NO_TEMPLATES 220 | }, 221 | uri: `http://${httpApiInternalNLB.loadBalancerDnsName}:8081/api/2.0/endpoints/{proxy}` 222 | }); 223 | 224 | const routesApiGatewayResourse = apiResource.addResource('2.0').addResource('endpoints') 225 | 226 | const routesResourceIntegration = new apigateway.Integration( 227 | { 228 | type: apigateway.IntegrationType.HTTP_PROXY, 229 | integrationHttpMethod: 'ANY', 230 | options: { 231 | connectionType: apigateway.ConnectionType.VPC_LINK, 232 | vpcLink: link, 233 | passthroughBehavior: PassthroughBehavior.WHEN_NO_TEMPLATES 234 | }, 235 | uri: `http://${httpApiInternalNLB.loadBalancerDnsName}:8081/api/2.0/endpoints/` 236 | }); 237 | 238 | // // /api/2.0/endpoints/ 239 | routesApiGatewayResourse.addMethod( 240 | 'ANY', 241 | routesResourceIntegration 242 | ) 243 | 244 | routesApiGatewayResourse.addProxy({ 245 | defaultIntegration: routesApiGatewayIntegration, 246 | defaultMethodOptions: { 247 | requestParameters: { 248 | 'method.request.path.proxy': true 249 | }, 250 | authorizationType: apigateway.AuthorizationType.IAM, 251 | }, 252 | // "false" will require explicitly adding methods on the `proxy` resource 253 | anyMethod: true // "true" is the default 254 | }); 255 | 256 | // /endpoints/{proxy+} 257 | const gatewayIntegration = new apigateway.Integration( 258 | { 259 | type: apigateway.IntegrationType.HTTP_PROXY, 260 | integrationHttpMethod: 'ANY', 261 | options: { 262 | connectionType: apigateway.ConnectionType.VPC_LINK, 263 | vpcLink: link, 264 | requestParameters: { 265 | 'integration.request.path.proxy': 'method.request.path.proxy' 266 | }, 267 | passthroughBehavior: PassthroughBehavior.WHEN_NO_TEMPLATES 268 | }, 269 | uri: `http://${httpApiInternalNLB.loadBalancerDnsName}:8081/endpoints/{proxy}` 270 | }); 271 | 272 | const gatewayResource = this.restApi.root.addResource('endpoints') 273 | 274 | gatewayResource.addProxy({ 275 | defaultIntegration: gatewayIntegration, 276 | defaultMethodOptions: { 277 | requestParameters: { 278 | 'method.request.path.proxy': true 279 | }, 280 | authorizationType: apigateway.AuthorizationType.IAM, 281 | }, 282 | // "false" will require explicitly adding methods on the `proxy` resource 283 | anyMethod: true // "true" is the default 284 | }); 285 | 286 | const mlflowRestApiId = new ssm.StringParameter(this, 'mlflowRestApiId', { 287 | parameterName: 'mlflow-restApiId', 288 | stringValue: this.restApi.restApiId, 289 | }); 290 | 291 | const mlflowRestApiUrl = new ssm.StringParameter(this, 'mlflowRestApiUrl', { 292 | parameterName: 'mlflow-restApiUrl', 293 | stringValue: this.restApi.url, 294 | }); 295 | 296 | NagSuppressions.addResourceSuppressions(this.userPool, [ 297 | { 298 | id: 'AwsSolutions-COG2', 299 | reason: 'MFA not necessary for this sample' 300 | } 301 | ] 302 | ) 303 | 304 | NagSuppressions.addResourceSuppressions(this.restApi, [ 305 | { 306 | id: 'AwsSolutions-APIG2', 307 | reason: 'Request validation is done at a deeper level by the MLflow server' 308 | }, 309 | { 310 | id: 'AwsSolutions-IAM4', 311 | reason: 'CloudWatch policy automatically generated', 312 | appliesTo: ['Policy::arn::iam::aws:policy/service-role/AmazonAPIGatewayPushToCloudWatchLogs'] 313 | }, 314 | { 315 | id: 'AwsSolutions-COG4', 316 | reason: 'The Proxy resource uses either a lambda authorizer (that validates the token with the Cognito User Pool or IAM_AUTH' 317 | } 318 | ], 319 | true 320 | ) 321 | 322 | NagSuppressions.addResourceSuppressions(lambdaAuthorizerRole, [ 323 | { 324 | id: 'AwsSolutions-IAM5', 325 | reason: 'Lambda Authorizer permissions to log to CloudWatch', 326 | appliesTo: [`Resource::arn:aws:logs:${this.region}:${this.account}:*`] 327 | } 328 | ]) 329 | 330 | new cdk.CfnOutput(this, "Rest API Output : ", { 331 | value: this.restApi.url, 332 | }); 333 | } 334 | } 335 | -------------------------------------------------------------------------------- /cdk/lib/sagemaker-studio-user-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from 'aws-cdk-lib'; 2 | import { Construct } from 'constructs'; 3 | 4 | import * as sagemaker from 'aws-cdk-lib/aws-sagemaker'; 5 | import * as ec2 from 'aws-cdk-lib/aws-ec2'; 6 | import * as apigateway from 'aws-cdk-lib/aws-apigateway'; 7 | import * as iam from "aws-cdk-lib/aws-iam"; 8 | import * as s3 from 'aws-cdk-lib/aws-s3'; 9 | import * as ssm from 'aws-cdk-lib/aws-ssm'; 10 | 11 | import { NagSuppressions } from 'cdk-nag' 12 | 13 | const sageMakerImageArnMapping = { 14 | 'us-east-1': "081325390199", 15 | 'us-east-2': "429704687514", 16 | 'us-west-1': "742091327244", 17 | 'us-west-2':"236514542706", 18 | 'af-south-1':"559312083959", 19 | 'ap-east-1':"493642496378", 20 | 'ap-south-1':"394103062818", 21 | 'ap-northeast-2':"806072073708", 22 | 'ap-southeast-1':"492261229750", 23 | 'ap-southeast-2':"452832661640", 24 | 'ap-northeast-1':"102112518831", 25 | 'ca-central-1':"310906938811", 26 | 'eu-central-1':"936697816551", 27 | 'eu-west-1':"470317259841", 28 | 'eu-west-2':"712779665605", 29 | 'eu-west-3':"615547856133", 30 | 'eu-north-1':"243637512696", 31 | 'eu-south-1':"592751261982", 32 | 'sa-east-1': "782484402741", 33 | } 34 | 35 | export class SageMakerStudioUserStack extends cdk.Stack { 36 | public readonly sagemakerStudioDomainId: string; 37 | 38 | readonly mlflowDeployBucketName = `mlflow-sagemaker-${this.region}-${this.account}` 39 | 40 | constructor( 41 | scope: Construct, 42 | id: string, 43 | httpGatewayStackName: string, 44 | restApiGateway: apigateway.RestApi, 45 | domainId: string, 46 | accessLogs: s3.Bucket, 47 | props?: cdk.StackProps 48 | ){ 49 | super(scope, id, props); 50 | 51 | // mlflow deployment S3 bucket 52 | const mlFlowDeployBucket = new s3.Bucket(this, "mlFlowDeployBucket", { 53 | versioned: false, 54 | bucketName: this.mlflowDeployBucketName, 55 | publicReadAccess: false, 56 | blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, 57 | removalPolicy: cdk.RemovalPolicy.DESTROY, 58 | autoDeleteObjects: true, 59 | encryption: s3.BucketEncryption.KMS_MANAGED, 60 | enforceSSL: true, 61 | serverAccessLogsBucket: accessLogs, 62 | serverAccessLogsPrefix: 'mlflow-deploy' 63 | }) 64 | 65 | const mlflowDeployBucketParam = new ssm.StringParameter(this, 'mlflowRestApiId', { 66 | parameterName: 'mlflow-deploy-bucket', 67 | stringValue: this.mlflowDeployBucketName, 68 | }); 69 | 70 | // Policy to access the parameters from the Notebook for lab setup 71 | const ssmPolicy = new iam.PolicyDocument({ 72 | statements: [ 73 | new iam.PolicyStatement({ 74 | effect: iam.Effect.ALLOW, 75 | resources: [ 76 | `arn:aws:ssm:${this.region}:${this.account}:parameter/mlflow-restApiId`, 77 | `arn:aws:ssm:${this.region}:${this.account}:parameter/mlflow-restApiUrl`, 78 | `arn:aws:ssm:${this.region}:${this.account}:parameter/mlflow-deploy-bucket`, 79 | `arn:aws:ssm:${this.region}:${this.account}:parameter/mlflow-uiUrl` 80 | ], 81 | actions: [ 82 | "ssm:GetParameters", 83 | "ssm:GetParameter", 84 | ] 85 | }) 86 | ] 87 | }) 88 | 89 | // Policy to have admin access to MLflow 90 | const restApiAdminPolicy = new iam.PolicyDocument({ 91 | statements: [ 92 | new iam.PolicyStatement({ 93 | effect: iam.Effect.ALLOW, 94 | resources: [ 95 | `arn:aws:execute-api:${this.region}:${this.account}:${restApiGateway.restApiId}/*/*/*` 96 | ], 97 | actions: ["execute-api:Invoke"], 98 | }) 99 | ], 100 | }) 101 | 102 | const s3bucketPolicy = new iam.PolicyDocument({ 103 | statements: [ 104 | new iam.PolicyStatement({ 105 | effect: iam.Effect.ALLOW, 106 | resources: [ 107 | `arn:aws:s3:::${this.mlflowDeployBucketName}`, 108 | `arn:aws:s3:::mlflow-${this.account}-${this.region}`, 109 | `arn:aws:s3:::${this.mlflowDeployBucketName}/*`, 110 | `arn:aws:s3:::mlflow-${this.account}-${this.region}/*` 111 | ], 112 | actions: ["s3:ListBucket","s3:GetObject", "s3:PutObject", "s3:DeleteObject", "s3:PutObjectTagging"], 113 | }) 114 | ], 115 | }) 116 | 117 | // Policy to have read-only access to MLflow 118 | const restApiReaderPolicy = new iam.PolicyDocument({ 119 | statements: [ 120 | new iam.PolicyStatement({ 121 | effect: iam.Effect.ALLOW, 122 | resources: [ 123 | `arn:aws:execute-api:${this.region}:${this.account}:${restApiGateway.restApiId}/*/GET/*`, 124 | `arn:aws:execute-api:${this.region}:${this.account}:${restApiGateway.restApiId}/*/POST/api/2.0/mlflow/runs/search`, 125 | `arn:aws:execute-api:${this.region}:${this.account}:${restApiGateway.restApiId}/*/POST/api/2.0/mlflow/experiments/search` 126 | ], 127 | actions: ["execute-api:Invoke"], 128 | }) 129 | ], 130 | }) 131 | 132 | // Model approver 133 | const restApiModelApprover = new iam.PolicyDocument({ 134 | statements: [ 135 | new iam.PolicyStatement({ 136 | effect: iam.Effect.ALLOW, 137 | resources: [ 138 | `arn:aws:execute-api:${this.region}:${this.account}:${restApiGateway.restApiId}/*/GET/*`, 139 | `arn:aws:execute-api:${this.region}:${this.account}:${restApiGateway.restApiId}/*/POST/api/2.0/mlflow/runs/search`, 140 | `arn:aws:execute-api:${this.region}:${this.account}:${restApiGateway.restApiId}/*/POST/api/2.0/mlflow/experiments/search`, 141 | `arn:aws:execute-api:${this.region}:${this.account}:${restApiGateway.restApiId}/*/POST/api/2.0/mlflow/model-versions/*`, 142 | `arn:aws:execute-api:${this.region}:${this.account}:${restApiGateway.restApiId}/*/POST/api/2.0/mlflow/registered-models/*`, 143 | ], 144 | actions: ["execute-api:Invoke"], 145 | }) 146 | ], 147 | }) 148 | 149 | // SageMaker Execution Role for admins 150 | const sagemakerAdminExecutionRole = new iam.Role(this, "sagemaker-mlflow-admin-role", { 151 | assumedBy: new iam.CompositePrincipal( 152 | new iam.ServicePrincipal("sagemaker.amazonaws.com") 153 | ), 154 | managedPolicies: [ 155 | iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonSageMakerFullAccess") 156 | ], 157 | inlinePolicies: { 158 | restApiAdmin: restApiAdminPolicy, 159 | s3Buckets: s3bucketPolicy, 160 | ssmPolicy: ssmPolicy 161 | }, 162 | }); 163 | 164 | // SageMaker Execution Role for readers 165 | const sagemakerReadersExecutionRole = new iam.Role(this, "sagemaker-mlflow-reader-role", { 166 | assumedBy: new iam.ServicePrincipal("sagemaker.amazonaws.com"), 167 | managedPolicies: [ 168 | iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonSageMakerFullAccess") 169 | ], 170 | inlinePolicies: { 171 | restApiReader: restApiReaderPolicy, 172 | s3Buckets: s3bucketPolicy, 173 | ssmPolicy: ssmPolicy 174 | }, 175 | }); 176 | 177 | // SageMaker Execution Role for readers 178 | const sagemakerModelAproverExecutionRole = new iam.Role(this, "sagemaker-mlflow-model-aprover-role", { 179 | assumedBy: new iam.ServicePrincipal("sagemaker.amazonaws.com"), 180 | managedPolicies: [ 181 | iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonSageMakerFullAccess") 182 | ], 183 | inlinePolicies: { 184 | restApiReader: restApiModelApprover, 185 | s3Buckets: s3bucketPolicy, 186 | ssmPolicy: ssmPolicy 187 | }, 188 | }); 189 | 190 | if (domainId == "") { 191 | // Create a domain 192 | const defaultVpc = ec2.Vpc.fromLookup(this, 'DefaultVPC', { isDefault: true }); 193 | const subnetIds: string[] = []; 194 | 195 | defaultVpc.publicSubnets.forEach((subnet, index) => { 196 | subnetIds.push(subnet.subnetId); 197 | }); 198 | 199 | const cfnStudioDomain = new sagemaker.CfnDomain(this, 'MyStudioDomain', { 200 | authMode: 'IAM', 201 | defaultUserSettings: { 202 | executionRole: sagemakerAdminExecutionRole.roleArn 203 | }, 204 | domainName: 'StudioDomainName', 205 | vpcId: defaultVpc.vpcId, 206 | subnetIds: subnetIds, 207 | }); 208 | 209 | this.sagemakerStudioDomainId = cfnStudioDomain.attrDomainId 210 | } 211 | else { 212 | this.sagemakerStudioDomainId = domainId 213 | } 214 | 215 | const cfnAdminProfile = new sagemaker.CfnUserProfile(this, 'MyCfnAdminProfile', { 216 | domainId: this.sagemakerStudioDomainId, 217 | userProfileName: 'mlflow-admin', 218 | userSettings: { 219 | executionRole: sagemakerAdminExecutionRole.roleArn, 220 | } 221 | } 222 | ); 223 | 224 | const cfnReaderProfile = new sagemaker.CfnUserProfile(this, 'MyCfnReaderProfile', { 225 | domainId: this.sagemakerStudioDomainId, 226 | userProfileName: 'mlflow-reader', 227 | userSettings: { 228 | executionRole: sagemakerReadersExecutionRole.roleArn, 229 | } 230 | } 231 | ); 232 | 233 | const cfnModelApproverProfile = new sagemaker.CfnUserProfile(this, 'MyCfnModelApproverProfile', { 234 | domainId: this.sagemakerStudioDomainId, 235 | userProfileName: 'mlflow-model-approver', 236 | userSettings: { 237 | executionRole: sagemakerModelAproverExecutionRole.roleArn, 238 | } 239 | } 240 | ); 241 | 242 | const cfnAdminJupyterApp = new sagemaker.CfnApp(this, 'MyCfnAdminJupyterApp', { 243 | appName: 'default', 244 | appType: 'JupyterServer', 245 | domainId: this.sagemakerStudioDomainId, 246 | userProfileName: cfnAdminProfile.userProfileName 247 | }) 248 | 249 | const cfnAdminKernelApp = new sagemaker.CfnApp(this, 'MyCfnAdminKernelApp', { 250 | appName: 'instance-mlflow-basepython-2-0-ml-t3-medium', 251 | appType: 'KernelGateway', 252 | domainId: this.sagemakerStudioDomainId, 253 | userProfileName: cfnAdminProfile.userProfileName, 254 | resourceSpec: { 255 | instanceType: 'ml.t3.medium', 256 | sageMakerImageArn: `arn:aws:sagemaker:${this.region}:${sageMakerImageArnMapping[this.region]}:image/sagemaker-base-python-38`, 257 | } 258 | }) 259 | 260 | cfnAdminJupyterApp.addDependency(cfnAdminProfile) 261 | cfnAdminKernelApp.addDependency(cfnAdminProfile) 262 | 263 | const cfnReaderJupyterApp = new sagemaker.CfnApp(this, 'MyCfnReaderJupyterApp', { 264 | appName: 'default', 265 | appType: 'JupyterServer', 266 | domainId: this.sagemakerStudioDomainId, 267 | userProfileName: cfnReaderProfile.userProfileName 268 | }) 269 | 270 | const cfnReaderKernelApp = new sagemaker.CfnApp(this, 'MyCfnReaderKernelApp', { 271 | appName: 'instance-mlflow-basepython-2-0-ml-t3-medium', 272 | appType: 'KernelGateway', 273 | domainId: this.sagemakerStudioDomainId, 274 | userProfileName: cfnReaderProfile.userProfileName, 275 | resourceSpec: { 276 | instanceType: 'ml.t3.medium', 277 | sageMakerImageArn: `arn:aws:sagemaker:${this.region}:${sageMakerImageArnMapping[this.region]}:image/sagemaker-base-python-38`, 278 | } 279 | }) 280 | 281 | cfnReaderJupyterApp.addDependency(cfnReaderProfile) 282 | cfnReaderKernelApp.addDependency(cfnReaderProfile) 283 | 284 | const cfnModelApproverJupyterApp = new sagemaker.CfnApp(this, 'MyCfnModelApproverJupyterApp', { 285 | appName: 'default', 286 | appType: 'JupyterServer', 287 | domainId: this.sagemakerStudioDomainId, 288 | userProfileName: cfnModelApproverProfile.userProfileName 289 | }) 290 | 291 | const cfnModelApproverKernelApp = new sagemaker.CfnApp(this, 'MyCfnModelApproverKernelApp', { 292 | appName: 'instance-mlflow-basepython-2-0-ml-t3-medium', 293 | appType: 'KernelGateway', 294 | domainId: this.sagemakerStudioDomainId, 295 | userProfileName: cfnModelApproverProfile.userProfileName, 296 | resourceSpec: { 297 | instanceType: 'ml.t3.medium', 298 | sageMakerImageArn: `arn:aws:sagemaker:${this.region}:${sageMakerImageArnMapping[this.region]}:image/sagemaker-base-python-38`, 299 | } 300 | }) 301 | cfnModelApproverJupyterApp.addDependency(cfnModelApproverProfile) 302 | cfnModelApproverKernelApp.addDependency(cfnModelApproverProfile) 303 | 304 | const nagIamSuprressionSMExecutionRole = [ 305 | { 306 | id: 'AwsSolutions-IAM4', 307 | reason: "Domain users require full access and the managed policy is likely better than '*'" 308 | }, 309 | { 310 | id: 'AwsSolutions-IAM5', 311 | reason: 'Group exceptions for API Gateway invoke permissions necessary to demonstrate model approver permission on MLflow', 312 | }, 313 | ] 314 | 315 | NagSuppressions.addResourceSuppressions(sagemakerAdminExecutionRole, nagIamSuprressionSMExecutionRole, true) 316 | 317 | NagSuppressions.addResourceSuppressions(sagemakerReadersExecutionRole, nagIamSuprressionSMExecutionRole, true) 318 | 319 | NagSuppressions.addResourceSuppressions(sagemakerModelAproverExecutionRole, nagIamSuprressionSMExecutionRole, true) 320 | } 321 | } 322 | -------------------------------------------------------------------------------- /cdk/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cdk", 3 | "version": "2.12.2", 4 | "lockfileVersion": 2, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "cdk", 9 | "version": "2.12.2", 10 | "dependencies": { 11 | "source-map-support": "^0.5.16" 12 | }, 13 | "bin": { 14 | "cdk": "bin/cdk.js" 15 | }, 16 | "devDependencies": { 17 | "@aws-cdk/aws-amplify-alpha": "2.104.0-alpha.0", 18 | "@aws-cdk/aws-cognito-identitypool-alpha": "2.104.0-alpha.0", 19 | "@aws-cdk/aws-lambda-python-alpha": "2.104.0-alpha.0", 20 | "@types/node": "10.17.27", 21 | "aws-cdk": "2.104.0", 22 | "aws-cdk-lib": "2.104.0", 23 | "cdk-nag": "2.25.5", 24 | "typescript": "~3.9.7" 25 | } 26 | }, 27 | "node_modules/@aws-cdk/asset-awscli-v1": { 28 | "version": "2.2.201", 29 | "resolved": "https://registry.npmjs.org/@aws-cdk/asset-awscli-v1/-/asset-awscli-v1-2.2.201.tgz", 30 | "integrity": "sha512-INZqcwDinNaIdb5CtW3ez5s943nX5stGBQS6VOP2JDlOFP81hM3fds/9NDknipqfUkZM43dx+HgVvkXYXXARCQ==", 31 | "dev": true 32 | }, 33 | "node_modules/@aws-cdk/asset-kubectl-v20": { 34 | "version": "2.1.2", 35 | "resolved": "https://registry.npmjs.org/@aws-cdk/asset-kubectl-v20/-/asset-kubectl-v20-2.1.2.tgz", 36 | "integrity": "sha512-3M2tELJOxQv0apCIiuKQ4pAbncz9GuLwnKFqxifWfe77wuMxyTRPmxssYHs42ePqzap1LT6GDcPygGs+hHstLg==", 37 | "dev": true 38 | }, 39 | "node_modules/@aws-cdk/asset-node-proxy-agent-v6": { 40 | "version": "2.0.1", 41 | "resolved": "https://registry.npmjs.org/@aws-cdk/asset-node-proxy-agent-v6/-/asset-node-proxy-agent-v6-2.0.1.tgz", 42 | "integrity": "sha512-DDt4SLdLOwWCjGtltH4VCST7hpOI5DzieuhGZsBpZ+AgJdSI2GCjklCXm0GCTwJG/SolkL5dtQXyUKgg9luBDg==", 43 | "dev": true 44 | }, 45 | "node_modules/@aws-cdk/aws-amplify-alpha": { 46 | "version": "2.104.0-alpha.0", 47 | "resolved": "https://registry.npmjs.org/@aws-cdk/aws-amplify-alpha/-/aws-amplify-alpha-2.104.0-alpha.0.tgz", 48 | "integrity": "sha512-TbvNrTblVxqPLqsZDtXuj1NvEP7WoO39hY8E2Ie6oWnURCDPq1eKFxWAvwrHb6DHKYX17Ruc3abYZyWQ9wtwyw==", 49 | "dev": true, 50 | "engines": { 51 | "node": ">= 14.15.0" 52 | }, 53 | "peerDependencies": { 54 | "aws-cdk-lib": "^2.104.0", 55 | "constructs": "^10.0.0" 56 | } 57 | }, 58 | "node_modules/@aws-cdk/aws-cognito-identitypool-alpha": { 59 | "version": "2.104.0-alpha.0", 60 | "resolved": "https://registry.npmjs.org/@aws-cdk/aws-cognito-identitypool-alpha/-/aws-cognito-identitypool-alpha-2.104.0-alpha.0.tgz", 61 | "integrity": "sha512-a1SdwZE1vdzkWjmULD9kt16nUHgg6Iz2WtHD8XZYw8nW4IKweIX0IgvO+Ie8JvGOfKrdZjTBpcwWgNTkA1aG9Q==", 62 | "dev": true, 63 | "engines": { 64 | "node": ">= 14.15.0" 65 | }, 66 | "peerDependencies": { 67 | "aws-cdk-lib": "^2.104.0", 68 | "constructs": "^10.0.0" 69 | } 70 | }, 71 | "node_modules/@aws-cdk/aws-lambda-python-alpha": { 72 | "version": "2.104.0-alpha.0", 73 | "resolved": "https://registry.npmjs.org/@aws-cdk/aws-lambda-python-alpha/-/aws-lambda-python-alpha-2.104.0-alpha.0.tgz", 74 | "integrity": "sha512-/5/KYpRQsoVZnUH7bjQ63J5iDp2fpArrmoiyQwiIHOVNVMDLE8UEPbK2AqKw2mInC99XiwIZaHA5u1PBiXB5kw==", 75 | "dev": true, 76 | "engines": { 77 | "node": ">= 14.15.0" 78 | }, 79 | "peerDependencies": { 80 | "aws-cdk-lib": "^2.104.0", 81 | "constructs": "^10.0.0" 82 | } 83 | }, 84 | "node_modules/@types/node": { 85 | "version": "10.17.27", 86 | "resolved": "https://registry.npmjs.org/@types/node/-/node-10.17.27.tgz", 87 | "integrity": "sha512-J0oqm9ZfAXaPdwNXMMgAhylw5fhmXkToJd06vuDUSAgEDZ/n/69/69UmyBZbc+zT34UnShuDSBqvim3SPnozJg==", 88 | "dev": true 89 | }, 90 | "node_modules/aws-cdk": { 91 | "version": "2.104.0", 92 | "resolved": "https://registry.npmjs.org/aws-cdk/-/aws-cdk-2.104.0.tgz", 93 | "integrity": "sha512-JuCafR5D1lnMKA88JUYhvRYeguozAWneC/n6kR1FUG+kXtXxpEqOxP91118dfJZYRw7FMIkHW8ewddvLwaCy5g==", 94 | "dev": true, 95 | "bin": { 96 | "cdk": "bin/cdk" 97 | }, 98 | "engines": { 99 | "node": ">= 14.15.0" 100 | }, 101 | "optionalDependencies": { 102 | "fsevents": "2.3.2" 103 | } 104 | }, 105 | "node_modules/aws-cdk-lib": { 106 | "version": "2.104.0", 107 | "resolved": "https://registry.npmjs.org/aws-cdk-lib/-/aws-cdk-lib-2.104.0.tgz", 108 | "integrity": "sha512-gD5KD2j8C5ff7j4RTA5ajFDNnpw4EvVhWMgWWrOyIEa9OkndEXwzhvCvwbEpwmgSUvnmsmbiHTBaLg8KVP+yKA==", 109 | "bundleDependencies": [ 110 | "@balena/dockerignore", 111 | "case", 112 | "fs-extra", 113 | "ignore", 114 | "jsonschema", 115 | "minimatch", 116 | "punycode", 117 | "semver", 118 | "table", 119 | "yaml" 120 | ], 121 | "dev": true, 122 | "dependencies": { 123 | "@aws-cdk/asset-awscli-v1": "^2.2.200", 124 | "@aws-cdk/asset-kubectl-v20": "^2.1.2", 125 | "@aws-cdk/asset-node-proxy-agent-v6": "^2.0.1", 126 | "@balena/dockerignore": "^1.0.2", 127 | "case": "1.6.3", 128 | "fs-extra": "^11.1.1", 129 | "ignore": "^5.2.4", 130 | "jsonschema": "^1.4.1", 131 | "minimatch": "^3.1.2", 132 | "punycode": "^2.3.0", 133 | "semver": "^7.5.4", 134 | "table": "^6.8.1", 135 | "yaml": "1.10.2" 136 | }, 137 | "engines": { 138 | "node": ">= 14.15.0" 139 | }, 140 | "peerDependencies": { 141 | "constructs": "^10.0.0" 142 | } 143 | }, 144 | "node_modules/aws-cdk-lib/node_modules/@balena/dockerignore": { 145 | "version": "1.0.2", 146 | "dev": true, 147 | "inBundle": true, 148 | "license": "Apache-2.0" 149 | }, 150 | "node_modules/aws-cdk-lib/node_modules/ajv": { 151 | "version": "8.12.0", 152 | "dev": true, 153 | "inBundle": true, 154 | "license": "MIT", 155 | "dependencies": { 156 | "fast-deep-equal": "^3.1.1", 157 | "json-schema-traverse": "^1.0.0", 158 | "require-from-string": "^2.0.2", 159 | "uri-js": "^4.2.2" 160 | }, 161 | "funding": { 162 | "type": "github", 163 | "url": "https://github.com/sponsors/epoberezkin" 164 | } 165 | }, 166 | "node_modules/aws-cdk-lib/node_modules/ansi-regex": { 167 | "version": "5.0.1", 168 | "dev": true, 169 | "inBundle": true, 170 | "license": "MIT", 171 | "engines": { 172 | "node": ">=8" 173 | } 174 | }, 175 | "node_modules/aws-cdk-lib/node_modules/ansi-styles": { 176 | "version": "4.3.0", 177 | "dev": true, 178 | "inBundle": true, 179 | "license": "MIT", 180 | "dependencies": { 181 | "color-convert": "^2.0.1" 182 | }, 183 | "engines": { 184 | "node": ">=8" 185 | }, 186 | "funding": { 187 | "url": "https://github.com/chalk/ansi-styles?sponsor=1" 188 | } 189 | }, 190 | "node_modules/aws-cdk-lib/node_modules/astral-regex": { 191 | "version": "2.0.0", 192 | "dev": true, 193 | "inBundle": true, 194 | "license": "MIT", 195 | "engines": { 196 | "node": ">=8" 197 | } 198 | }, 199 | "node_modules/aws-cdk-lib/node_modules/balanced-match": { 200 | "version": "1.0.2", 201 | "dev": true, 202 | "inBundle": true, 203 | "license": "MIT" 204 | }, 205 | "node_modules/aws-cdk-lib/node_modules/brace-expansion": { 206 | "version": "1.1.11", 207 | "dev": true, 208 | "inBundle": true, 209 | "license": "MIT", 210 | "dependencies": { 211 | "balanced-match": "^1.0.0", 212 | "concat-map": "0.0.1" 213 | } 214 | }, 215 | "node_modules/aws-cdk-lib/node_modules/case": { 216 | "version": "1.6.3", 217 | "dev": true, 218 | "inBundle": true, 219 | "license": "(MIT OR GPL-3.0-or-later)", 220 | "engines": { 221 | "node": ">= 0.8.0" 222 | } 223 | }, 224 | "node_modules/aws-cdk-lib/node_modules/color-convert": { 225 | "version": "2.0.1", 226 | "dev": true, 227 | "inBundle": true, 228 | "license": "MIT", 229 | "dependencies": { 230 | "color-name": "~1.1.4" 231 | }, 232 | "engines": { 233 | "node": ">=7.0.0" 234 | } 235 | }, 236 | "node_modules/aws-cdk-lib/node_modules/color-name": { 237 | "version": "1.1.4", 238 | "dev": true, 239 | "inBundle": true, 240 | "license": "MIT" 241 | }, 242 | "node_modules/aws-cdk-lib/node_modules/concat-map": { 243 | "version": "0.0.1", 244 | "dev": true, 245 | "inBundle": true, 246 | "license": "MIT" 247 | }, 248 | "node_modules/aws-cdk-lib/node_modules/emoji-regex": { 249 | "version": "8.0.0", 250 | "dev": true, 251 | "inBundle": true, 252 | "license": "MIT" 253 | }, 254 | "node_modules/aws-cdk-lib/node_modules/fast-deep-equal": { 255 | "version": "3.1.3", 256 | "dev": true, 257 | "inBundle": true, 258 | "license": "MIT" 259 | }, 260 | "node_modules/aws-cdk-lib/node_modules/fs-extra": { 261 | "version": "11.1.1", 262 | "dev": true, 263 | "inBundle": true, 264 | "license": "MIT", 265 | "dependencies": { 266 | "graceful-fs": "^4.2.0", 267 | "jsonfile": "^6.0.1", 268 | "universalify": "^2.0.0" 269 | }, 270 | "engines": { 271 | "node": ">=14.14" 272 | } 273 | }, 274 | "node_modules/aws-cdk-lib/node_modules/graceful-fs": { 275 | "version": "4.2.11", 276 | "dev": true, 277 | "inBundle": true, 278 | "license": "ISC" 279 | }, 280 | "node_modules/aws-cdk-lib/node_modules/ignore": { 281 | "version": "5.2.4", 282 | "dev": true, 283 | "inBundle": true, 284 | "license": "MIT", 285 | "engines": { 286 | "node": ">= 4" 287 | } 288 | }, 289 | "node_modules/aws-cdk-lib/node_modules/is-fullwidth-code-point": { 290 | "version": "3.0.0", 291 | "dev": true, 292 | "inBundle": true, 293 | "license": "MIT", 294 | "engines": { 295 | "node": ">=8" 296 | } 297 | }, 298 | "node_modules/aws-cdk-lib/node_modules/json-schema-traverse": { 299 | "version": "1.0.0", 300 | "dev": true, 301 | "inBundle": true, 302 | "license": "MIT" 303 | }, 304 | "node_modules/aws-cdk-lib/node_modules/jsonfile": { 305 | "version": "6.1.0", 306 | "dev": true, 307 | "inBundle": true, 308 | "license": "MIT", 309 | "dependencies": { 310 | "universalify": "^2.0.0" 311 | }, 312 | "optionalDependencies": { 313 | "graceful-fs": "^4.1.6" 314 | } 315 | }, 316 | "node_modules/aws-cdk-lib/node_modules/jsonschema": { 317 | "version": "1.4.1", 318 | "dev": true, 319 | "inBundle": true, 320 | "license": "MIT", 321 | "engines": { 322 | "node": "*" 323 | } 324 | }, 325 | "node_modules/aws-cdk-lib/node_modules/lodash.truncate": { 326 | "version": "4.4.2", 327 | "dev": true, 328 | "inBundle": true, 329 | "license": "MIT" 330 | }, 331 | "node_modules/aws-cdk-lib/node_modules/lru-cache": { 332 | "version": "6.0.0", 333 | "dev": true, 334 | "inBundle": true, 335 | "license": "ISC", 336 | "dependencies": { 337 | "yallist": "^4.0.0" 338 | }, 339 | "engines": { 340 | "node": ">=10" 341 | } 342 | }, 343 | "node_modules/aws-cdk-lib/node_modules/minimatch": { 344 | "version": "3.1.2", 345 | "dev": true, 346 | "inBundle": true, 347 | "license": "ISC", 348 | "dependencies": { 349 | "brace-expansion": "^1.1.7" 350 | }, 351 | "engines": { 352 | "node": "*" 353 | } 354 | }, 355 | "node_modules/aws-cdk-lib/node_modules/punycode": { 356 | "version": "2.3.0", 357 | "dev": true, 358 | "inBundle": true, 359 | "license": "MIT", 360 | "engines": { 361 | "node": ">=6" 362 | } 363 | }, 364 | "node_modules/aws-cdk-lib/node_modules/require-from-string": { 365 | "version": "2.0.2", 366 | "dev": true, 367 | "inBundle": true, 368 | "license": "MIT", 369 | "engines": { 370 | "node": ">=0.10.0" 371 | } 372 | }, 373 | "node_modules/aws-cdk-lib/node_modules/semver": { 374 | "version": "7.5.4", 375 | "dev": true, 376 | "inBundle": true, 377 | "license": "ISC", 378 | "dependencies": { 379 | "lru-cache": "^6.0.0" 380 | }, 381 | "bin": { 382 | "semver": "bin/semver.js" 383 | }, 384 | "engines": { 385 | "node": ">=10" 386 | } 387 | }, 388 | "node_modules/aws-cdk-lib/node_modules/slice-ansi": { 389 | "version": "4.0.0", 390 | "dev": true, 391 | "inBundle": true, 392 | "license": "MIT", 393 | "dependencies": { 394 | "ansi-styles": "^4.0.0", 395 | "astral-regex": "^2.0.0", 396 | "is-fullwidth-code-point": "^3.0.0" 397 | }, 398 | "engines": { 399 | "node": ">=10" 400 | }, 401 | "funding": { 402 | "url": "https://github.com/chalk/slice-ansi?sponsor=1" 403 | } 404 | }, 405 | "node_modules/aws-cdk-lib/node_modules/string-width": { 406 | "version": "4.2.3", 407 | "dev": true, 408 | "inBundle": true, 409 | "license": "MIT", 410 | "dependencies": { 411 | "emoji-regex": "^8.0.0", 412 | "is-fullwidth-code-point": "^3.0.0", 413 | "strip-ansi": "^6.0.1" 414 | }, 415 | "engines": { 416 | "node": ">=8" 417 | } 418 | }, 419 | "node_modules/aws-cdk-lib/node_modules/strip-ansi": { 420 | "version": "6.0.1", 421 | "dev": true, 422 | "inBundle": true, 423 | "license": "MIT", 424 | "dependencies": { 425 | "ansi-regex": "^5.0.1" 426 | }, 427 | "engines": { 428 | "node": ">=8" 429 | } 430 | }, 431 | "node_modules/aws-cdk-lib/node_modules/table": { 432 | "version": "6.8.1", 433 | "dev": true, 434 | "inBundle": true, 435 | "license": "BSD-3-Clause", 436 | "dependencies": { 437 | "ajv": "^8.0.1", 438 | "lodash.truncate": "^4.4.2", 439 | "slice-ansi": "^4.0.0", 440 | "string-width": "^4.2.3", 441 | "strip-ansi": "^6.0.1" 442 | }, 443 | "engines": { 444 | "node": ">=10.0.0" 445 | } 446 | }, 447 | "node_modules/aws-cdk-lib/node_modules/universalify": { 448 | "version": "2.0.0", 449 | "dev": true, 450 | "inBundle": true, 451 | "license": "MIT", 452 | "engines": { 453 | "node": ">= 10.0.0" 454 | } 455 | }, 456 | "node_modules/aws-cdk-lib/node_modules/uri-js": { 457 | "version": "4.4.1", 458 | "dev": true, 459 | "inBundle": true, 460 | "license": "BSD-2-Clause", 461 | "dependencies": { 462 | "punycode": "^2.1.0" 463 | } 464 | }, 465 | "node_modules/aws-cdk-lib/node_modules/yallist": { 466 | "version": "4.0.0", 467 | "dev": true, 468 | "inBundle": true, 469 | "license": "ISC" 470 | }, 471 | "node_modules/aws-cdk-lib/node_modules/yaml": { 472 | "version": "1.10.2", 473 | "dev": true, 474 | "inBundle": true, 475 | "license": "ISC", 476 | "engines": { 477 | "node": ">= 6" 478 | } 479 | }, 480 | "node_modules/buffer-from": { 481 | "version": "1.1.1", 482 | "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", 483 | "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==" 484 | }, 485 | "node_modules/cdk-nag": { 486 | "version": "2.25.5", 487 | "resolved": "https://registry.npmjs.org/cdk-nag/-/cdk-nag-2.25.5.tgz", 488 | "integrity": "sha512-LTxiNd1BP5NM0ztJ7wAD3r5SNWVkcjbT09OtccFAAZ6xRU3RRVkGxNKqS1rFmdyleNGXpw4Dwe1wkgsc61zppw==", 489 | "dev": true, 490 | "peerDependencies": { 491 | "aws-cdk-lib": "^2.45.0", 492 | "constructs": "^10.0.5" 493 | } 494 | }, 495 | "node_modules/constructs": { 496 | "version": "10.0.37", 497 | "resolved": "https://registry.npmjs.org/constructs/-/constructs-10.0.37.tgz", 498 | "integrity": "sha512-wXtJtGpYzV8R+krlzeFpWqyndJ7zX7OLajzRTYW3bOE8bvfubiUdLbnTCienfcLz1fpvCnyTIO1b98CLBawKQw==", 499 | "dev": true, 500 | "peer": true, 501 | "engines": { 502 | "node": ">= 12.7.0" 503 | } 504 | }, 505 | "node_modules/fsevents": { 506 | "version": "2.3.2", 507 | "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", 508 | "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", 509 | "dev": true, 510 | "hasInstallScript": true, 511 | "optional": true, 512 | "os": [ 513 | "darwin" 514 | ], 515 | "engines": { 516 | "node": "^8.16.0 || ^10.6.0 || >=11.0.0" 517 | } 518 | }, 519 | "node_modules/source-map": { 520 | "version": "0.6.1", 521 | "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", 522 | "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", 523 | "engines": { 524 | "node": ">=0.10.0" 525 | } 526 | }, 527 | "node_modules/source-map-support": { 528 | "version": "0.5.19", 529 | "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.19.tgz", 530 | "integrity": "sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==", 531 | "dependencies": { 532 | "buffer-from": "^1.0.0", 533 | "source-map": "^0.6.0" 534 | } 535 | }, 536 | "node_modules/typescript": { 537 | "version": "3.9.7", 538 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.9.7.tgz", 539 | "integrity": "sha512-BLbiRkiBzAwsjut4x/dsibSTB6yWpwT5qWmC2OfuCg3GgVQCSgMs4vEctYPhsaGtd0AeuuHMkjZ2h2WG8MSzRw==", 540 | "dev": true, 541 | "bin": { 542 | "tsc": "bin/tsc", 543 | "tsserver": "bin/tsserver" 544 | }, 545 | "engines": { 546 | "node": ">=4.2.0" 547 | } 548 | } 549 | }, 550 | "dependencies": { 551 | "@aws-cdk/asset-awscli-v1": { 552 | "version": "2.2.201", 553 | "resolved": "https://registry.npmjs.org/@aws-cdk/asset-awscli-v1/-/asset-awscli-v1-2.2.201.tgz", 554 | "integrity": "sha512-INZqcwDinNaIdb5CtW3ez5s943nX5stGBQS6VOP2JDlOFP81hM3fds/9NDknipqfUkZM43dx+HgVvkXYXXARCQ==", 555 | "dev": true 556 | }, 557 | "@aws-cdk/asset-kubectl-v20": { 558 | "version": "2.1.2", 559 | "resolved": "https://registry.npmjs.org/@aws-cdk/asset-kubectl-v20/-/asset-kubectl-v20-2.1.2.tgz", 560 | "integrity": "sha512-3M2tELJOxQv0apCIiuKQ4pAbncz9GuLwnKFqxifWfe77wuMxyTRPmxssYHs42ePqzap1LT6GDcPygGs+hHstLg==", 561 | "dev": true 562 | }, 563 | "@aws-cdk/asset-node-proxy-agent-v6": { 564 | "version": "2.0.1", 565 | "resolved": "https://registry.npmjs.org/@aws-cdk/asset-node-proxy-agent-v6/-/asset-node-proxy-agent-v6-2.0.1.tgz", 566 | "integrity": "sha512-DDt4SLdLOwWCjGtltH4VCST7hpOI5DzieuhGZsBpZ+AgJdSI2GCjklCXm0GCTwJG/SolkL5dtQXyUKgg9luBDg==", 567 | "dev": true 568 | }, 569 | "@aws-cdk/aws-amplify-alpha": { 570 | "version": "2.104.0-alpha.0", 571 | "resolved": "https://registry.npmjs.org/@aws-cdk/aws-amplify-alpha/-/aws-amplify-alpha-2.104.0-alpha.0.tgz", 572 | "integrity": "sha512-TbvNrTblVxqPLqsZDtXuj1NvEP7WoO39hY8E2Ie6oWnURCDPq1eKFxWAvwrHb6DHKYX17Ruc3abYZyWQ9wtwyw==", 573 | "dev": true, 574 | "requires": {} 575 | }, 576 | "@aws-cdk/aws-cognito-identitypool-alpha": { 577 | "version": "2.104.0-alpha.0", 578 | "resolved": "https://registry.npmjs.org/@aws-cdk/aws-cognito-identitypool-alpha/-/aws-cognito-identitypool-alpha-2.104.0-alpha.0.tgz", 579 | "integrity": "sha512-a1SdwZE1vdzkWjmULD9kt16nUHgg6Iz2WtHD8XZYw8nW4IKweIX0IgvO+Ie8JvGOfKrdZjTBpcwWgNTkA1aG9Q==", 580 | "dev": true, 581 | "requires": {} 582 | }, 583 | "@aws-cdk/aws-lambda-python-alpha": { 584 | "version": "2.104.0-alpha.0", 585 | "resolved": "https://registry.npmjs.org/@aws-cdk/aws-lambda-python-alpha/-/aws-lambda-python-alpha-2.104.0-alpha.0.tgz", 586 | "integrity": "sha512-/5/KYpRQsoVZnUH7bjQ63J5iDp2fpArrmoiyQwiIHOVNVMDLE8UEPbK2AqKw2mInC99XiwIZaHA5u1PBiXB5kw==", 587 | "dev": true, 588 | "requires": {} 589 | }, 590 | "@types/node": { 591 | "version": "10.17.27", 592 | "resolved": "https://registry.npmjs.org/@types/node/-/node-10.17.27.tgz", 593 | "integrity": "sha512-J0oqm9ZfAXaPdwNXMMgAhylw5fhmXkToJd06vuDUSAgEDZ/n/69/69UmyBZbc+zT34UnShuDSBqvim3SPnozJg==", 594 | "dev": true 595 | }, 596 | "aws-cdk": { 597 | "version": "2.104.0", 598 | "resolved": "https://registry.npmjs.org/aws-cdk/-/aws-cdk-2.104.0.tgz", 599 | "integrity": "sha512-JuCafR5D1lnMKA88JUYhvRYeguozAWneC/n6kR1FUG+kXtXxpEqOxP91118dfJZYRw7FMIkHW8ewddvLwaCy5g==", 600 | "dev": true, 601 | "requires": { 602 | "fsevents": "2.3.2" 603 | } 604 | }, 605 | "aws-cdk-lib": { 606 | "version": "2.104.0", 607 | "resolved": "https://registry.npmjs.org/aws-cdk-lib/-/aws-cdk-lib-2.104.0.tgz", 608 | "integrity": "sha512-gD5KD2j8C5ff7j4RTA5ajFDNnpw4EvVhWMgWWrOyIEa9OkndEXwzhvCvwbEpwmgSUvnmsmbiHTBaLg8KVP+yKA==", 609 | "dev": true, 610 | "requires": { 611 | "@aws-cdk/asset-awscli-v1": "^2.2.200", 612 | "@aws-cdk/asset-kubectl-v20": "^2.1.2", 613 | "@aws-cdk/asset-node-proxy-agent-v6": "^2.0.1", 614 | "@balena/dockerignore": "^1.0.2", 615 | "case": "1.6.3", 616 | "fs-extra": "^11.1.1", 617 | "ignore": "^5.2.4", 618 | "jsonschema": "^1.4.1", 619 | "minimatch": "^3.1.2", 620 | "punycode": "^2.3.0", 621 | "semver": "^7.5.4", 622 | "table": "^6.8.1", 623 | "yaml": "1.10.2" 624 | }, 625 | "dependencies": { 626 | "@balena/dockerignore": { 627 | "version": "1.0.2", 628 | "bundled": true, 629 | "dev": true 630 | }, 631 | "ajv": { 632 | "version": "8.12.0", 633 | "bundled": true, 634 | "dev": true, 635 | "requires": { 636 | "fast-deep-equal": "^3.1.1", 637 | "json-schema-traverse": "^1.0.0", 638 | "require-from-string": "^2.0.2", 639 | "uri-js": "^4.2.2" 640 | } 641 | }, 642 | "ansi-regex": { 643 | "version": "5.0.1", 644 | "bundled": true, 645 | "dev": true 646 | }, 647 | "ansi-styles": { 648 | "version": "4.3.0", 649 | "bundled": true, 650 | "dev": true, 651 | "requires": { 652 | "color-convert": "^2.0.1" 653 | } 654 | }, 655 | "astral-regex": { 656 | "version": "2.0.0", 657 | "bundled": true, 658 | "dev": true 659 | }, 660 | "balanced-match": { 661 | "version": "1.0.2", 662 | "bundled": true, 663 | "dev": true 664 | }, 665 | "brace-expansion": { 666 | "version": "1.1.11", 667 | "bundled": true, 668 | "dev": true, 669 | "requires": { 670 | "balanced-match": "^1.0.0", 671 | "concat-map": "0.0.1" 672 | } 673 | }, 674 | "case": { 675 | "version": "1.6.3", 676 | "bundled": true, 677 | "dev": true 678 | }, 679 | "color-convert": { 680 | "version": "2.0.1", 681 | "bundled": true, 682 | "dev": true, 683 | "requires": { 684 | "color-name": "~1.1.4" 685 | } 686 | }, 687 | "color-name": { 688 | "version": "1.1.4", 689 | "bundled": true, 690 | "dev": true 691 | }, 692 | "concat-map": { 693 | "version": "0.0.1", 694 | "bundled": true, 695 | "dev": true 696 | }, 697 | "emoji-regex": { 698 | "version": "8.0.0", 699 | "bundled": true, 700 | "dev": true 701 | }, 702 | "fast-deep-equal": { 703 | "version": "3.1.3", 704 | "bundled": true, 705 | "dev": true 706 | }, 707 | "fs-extra": { 708 | "version": "11.1.1", 709 | "bundled": true, 710 | "dev": true, 711 | "requires": { 712 | "graceful-fs": "^4.2.0", 713 | "jsonfile": "^6.0.1", 714 | "universalify": "^2.0.0" 715 | } 716 | }, 717 | "graceful-fs": { 718 | "version": "4.2.11", 719 | "bundled": true, 720 | "dev": true 721 | }, 722 | "ignore": { 723 | "version": "5.2.4", 724 | "bundled": true, 725 | "dev": true 726 | }, 727 | "is-fullwidth-code-point": { 728 | "version": "3.0.0", 729 | "bundled": true, 730 | "dev": true 731 | }, 732 | "json-schema-traverse": { 733 | "version": "1.0.0", 734 | "bundled": true, 735 | "dev": true 736 | }, 737 | "jsonfile": { 738 | "version": "6.1.0", 739 | "bundled": true, 740 | "dev": true, 741 | "requires": { 742 | "graceful-fs": "^4.1.6", 743 | "universalify": "^2.0.0" 744 | } 745 | }, 746 | "jsonschema": { 747 | "version": "1.4.1", 748 | "bundled": true, 749 | "dev": true 750 | }, 751 | "lodash.truncate": { 752 | "version": "4.4.2", 753 | "bundled": true, 754 | "dev": true 755 | }, 756 | "lru-cache": { 757 | "version": "6.0.0", 758 | "bundled": true, 759 | "dev": true, 760 | "requires": { 761 | "yallist": "^4.0.0" 762 | } 763 | }, 764 | "minimatch": { 765 | "version": "3.1.2", 766 | "bundled": true, 767 | "dev": true, 768 | "requires": { 769 | "brace-expansion": "^1.1.7" 770 | } 771 | }, 772 | "punycode": { 773 | "version": "2.3.0", 774 | "bundled": true, 775 | "dev": true 776 | }, 777 | "require-from-string": { 778 | "version": "2.0.2", 779 | "bundled": true, 780 | "dev": true 781 | }, 782 | "semver": { 783 | "version": "7.5.4", 784 | "bundled": true, 785 | "dev": true, 786 | "requires": { 787 | "lru-cache": "^6.0.0" 788 | } 789 | }, 790 | "slice-ansi": { 791 | "version": "4.0.0", 792 | "bundled": true, 793 | "dev": true, 794 | "requires": { 795 | "ansi-styles": "^4.0.0", 796 | "astral-regex": "^2.0.0", 797 | "is-fullwidth-code-point": "^3.0.0" 798 | } 799 | }, 800 | "string-width": { 801 | "version": "4.2.3", 802 | "bundled": true, 803 | "dev": true, 804 | "requires": { 805 | "emoji-regex": "^8.0.0", 806 | "is-fullwidth-code-point": "^3.0.0", 807 | "strip-ansi": "^6.0.1" 808 | } 809 | }, 810 | "strip-ansi": { 811 | "version": "6.0.1", 812 | "bundled": true, 813 | "dev": true, 814 | "requires": { 815 | "ansi-regex": "^5.0.1" 816 | } 817 | }, 818 | "table": { 819 | "version": "6.8.1", 820 | "bundled": true, 821 | "dev": true, 822 | "requires": { 823 | "ajv": "^8.0.1", 824 | "lodash.truncate": "^4.4.2", 825 | "slice-ansi": "^4.0.0", 826 | "string-width": "^4.2.3", 827 | "strip-ansi": "^6.0.1" 828 | } 829 | }, 830 | "universalify": { 831 | "version": "2.0.0", 832 | "bundled": true, 833 | "dev": true 834 | }, 835 | "uri-js": { 836 | "version": "4.4.1", 837 | "bundled": true, 838 | "dev": true, 839 | "requires": { 840 | "punycode": "^2.1.0" 841 | } 842 | }, 843 | "yallist": { 844 | "version": "4.0.0", 845 | "bundled": true, 846 | "dev": true 847 | }, 848 | "yaml": { 849 | "version": "1.10.2", 850 | "bundled": true, 851 | "dev": true 852 | } 853 | } 854 | }, 855 | "buffer-from": { 856 | "version": "1.1.1", 857 | "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", 858 | "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==" 859 | }, 860 | "cdk-nag": { 861 | "version": "2.25.5", 862 | "resolved": "https://registry.npmjs.org/cdk-nag/-/cdk-nag-2.25.5.tgz", 863 | "integrity": "sha512-LTxiNd1BP5NM0ztJ7wAD3r5SNWVkcjbT09OtccFAAZ6xRU3RRVkGxNKqS1rFmdyleNGXpw4Dwe1wkgsc61zppw==", 864 | "dev": true, 865 | "requires": {} 866 | }, 867 | "constructs": { 868 | "version": "10.0.37", 869 | "resolved": "https://registry.npmjs.org/constructs/-/constructs-10.0.37.tgz", 870 | "integrity": "sha512-wXtJtGpYzV8R+krlzeFpWqyndJ7zX7OLajzRTYW3bOE8bvfubiUdLbnTCienfcLz1fpvCnyTIO1b98CLBawKQw==", 871 | "dev": true, 872 | "peer": true 873 | }, 874 | "fsevents": { 875 | "version": "2.3.2", 876 | "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", 877 | "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", 878 | "dev": true, 879 | "optional": true 880 | }, 881 | "source-map": { 882 | "version": "0.6.1", 883 | "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", 884 | "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==" 885 | }, 886 | "source-map-support": { 887 | "version": "0.5.19", 888 | "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.19.tgz", 889 | "integrity": "sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==", 890 | "requires": { 891 | "buffer-from": "^1.0.0", 892 | "source-map": "^0.6.0" 893 | } 894 | }, 895 | "typescript": { 896 | "version": "3.9.7", 897 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.9.7.tgz", 898 | "integrity": "sha512-BLbiRkiBzAwsjut4x/dsibSTB6yWpwT5qWmC2OfuCg3GgVQCSgMs4vEctYPhsaGtd0AeuuHMkjZ2h2WG8MSzRw==", 899 | "dev": true 900 | } 901 | } 902 | } 903 | -------------------------------------------------------------------------------- /cdk/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cdk", 3 | "version": "2.12.2", 4 | "bin": { 5 | "cdk": "bin/cdk.js" 6 | }, 7 | "scripts": { 8 | "cdk": "cdk" 9 | }, 10 | "devDependencies": { 11 | "aws-cdk-lib": "2.104.0", 12 | "@aws-cdk/aws-amplify-alpha": "2.104.0-alpha.0", 13 | "@aws-cdk/aws-cognito-identitypool-alpha": "2.104.0-alpha.0", 14 | "@aws-cdk/aws-lambda-python-alpha": "2.104.0-alpha.0", 15 | "@types/node": "10.17.27", 16 | "cdk-nag": "2.25.5", 17 | "aws-cdk": "2.104.0", 18 | "typescript": "~3.9.7" 19 | }, 20 | "dependencies": { 21 | "source-map-support": "^0.5.16" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /cdk/resize-cloud9.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify the desired volume size in GiB as a command line argument. If not specified, default to 20 GiB. 4 | SIZE=${1:-20} 5 | 6 | # Get the ID of the environment host Amazon EC2 instance. 7 | INSTANCEID=$(curl http://169.254.169.254/latest/meta-data/instance-id) 8 | REGION=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone | sed 's/\(.*\)[a-z]/\1/') 9 | 10 | # Get the ID of the Amazon EBS volume associated with the instance. 11 | VOLUMEID=$(aws ec2 describe-instances \ 12 | --instance-id $INSTANCEID \ 13 | --query "Reservations[0].Instances[0].BlockDeviceMappings[0].Ebs.VolumeId" \ 14 | --output text \ 15 | --region $REGION) 16 | 17 | # Resize the EBS volume. 18 | aws ec2 modify-volume --volume-id $VOLUMEID --size $SIZE 19 | 20 | # Wait for the resize to finish. 21 | while [ \ 22 | "$(aws ec2 describe-volumes-modifications \ 23 | --volume-id $VOLUMEID \ 24 | --filters Name=modification-state,Values="optimizing","completed" \ 25 | --query "length(VolumesModifications)"\ 26 | --output text)" != "1" ]; do 27 | sleep 1 28 | done 29 | 30 | #Check if we're on an NVMe filesystem 31 | if [[ -e "/dev/xvda" && $(readlink -f /dev/xvda) = "/dev/xvda" ]] 32 | then 33 | # Rewrite the partition table so that the partition takes up all the space that it can. 34 | sudo growpart /dev/xvda 1 35 | 36 | # Expand the size of the file system. 37 | # Check if we're on AL2 38 | STR=$(cat /etc/os-release) 39 | SUB="VERSION_ID=\"2\"" 40 | if [[ "$STR" == *"$SUB"* ]] 41 | then 42 | sudo xfs_growfs -d / 43 | else 44 | sudo resize2fs /dev/xvda1 45 | fi 46 | 47 | else 48 | # Rewrite the partition table so that the partition takes up all the space that it can. 49 | sudo growpart /dev/nvme0n1 1 50 | 51 | # Expand the size of the file system. 52 | # Check if we're on AL2 53 | STR=$(cat /etc/os-release) 54 | SUB="VERSION_ID=\"2\"" 55 | if [[ "$STR" == *"$SUB"* ]] 56 | then 57 | sudo xfs_growfs -d / 58 | else 59 | sudo resize2fs /dev/nvme0n1p1 60 | fi 61 | fi -------------------------------------------------------------------------------- /cdk/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2018", 4 | "module": "commonjs", 5 | "lib": ["es2018"], 6 | "declaration": true, 7 | "strict": true, 8 | "noImplicitAny": false, 9 | "strictNullChecks": true, 10 | "noImplicitThis": true, 11 | "alwaysStrict": true, 12 | "noUnusedLocals": false, 13 | "noUnusedParameters": false, 14 | "noImplicitReturns": true, 15 | "noFallthroughCasesInSwitch": false, 16 | "inlineSourceMap": true, 17 | "inlineSources": true, 18 | "experimentalDecorators": true, 19 | "strictPropertyInitialization": false, 20 | "typeRoots": ["./node_modules/@types"] 21 | }, 22 | "exclude": ["cdk.out"] 23 | } 24 | -------------------------------------------------------------------------------- /images/amplify-main-branch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/amplify-main-branch.png -------------------------------------------------------------------------------- /images/amplify-mlflow-ui-link.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/amplify-mlflow-ui-link.png -------------------------------------------------------------------------------- /images/amplify-redeploy-this-version.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/amplify-redeploy-this-version.png -------------------------------------------------------------------------------- /images/amplify-run-first-build.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/amplify-run-first-build.png -------------------------------------------------------------------------------- /images/cognito-user-pool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/cognito-user-pool.png -------------------------------------------------------------------------------- /images/enable-models.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/enable-models.gif -------------------------------------------------------------------------------- /images/jupyterlab-iframe-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/jupyterlab-iframe-search.png -------------------------------------------------------------------------------- /images/mlflow-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/mlflow-architecture.png -------------------------------------------------------------------------------- /images/mlflow-cognito.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/mlflow-cognito.png -------------------------------------------------------------------------------- /images/mlflow-gateway-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/mlflow-gateway-architecture.png -------------------------------------------------------------------------------- /images/mlflow-gateway-cognito.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/mlflow-gateway-cognito.png -------------------------------------------------------------------------------- /images/mlflow-gateway-sagemaker-cognito.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/mlflow-gateway-sagemaker-cognito.png -------------------------------------------------------------------------------- /images/mlflow-gateway-sagemaker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/mlflow-gateway-sagemaker.png -------------------------------------------------------------------------------- /images/mlflow-output-artifacts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/mlflow-output-artifacts.png -------------------------------------------------------------------------------- /images/mlflow-sagemaker-cognito.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/mlflow-sagemaker-cognito.png -------------------------------------------------------------------------------- /images/mlflow-sagemaker-multi-account.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/mlflow-sagemaker-multi-account.png -------------------------------------------------------------------------------- /images/mlflow-sagemaker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/mlflow-sagemaker.png -------------------------------------------------------------------------------- /images/sagemaker-studio-domain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/sagemaker-studio-domain.png -------------------------------------------------------------------------------- /images/sm-mlflow-admin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/sm-mlflow-admin.png -------------------------------------------------------------------------------- /images/studio-extension-manager.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/studio-extension-manager.png -------------------------------------------------------------------------------- /images/studio-iframe-mlflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-studio-mlflow-integration/3b847a4b70243c2ac0dee136655b981fef737a3a/images/studio-iframe-mlflow.png -------------------------------------------------------------------------------- /lab/4_mlflow-gateway.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Train a Scikit-Learn model in SageMaker and track with MLFlow" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Intro\n", 15 | "\n", 16 | "The main objective of this notebook is to show how you can securely interact with a MLflow server using Amazon SageMaker Studio and SageMaker Trainings.\n", 17 | "This notebook is meant to be used with the SageMaker Studio user profile `mlflow-admin` and the MLflow server created by [this CDK deployment](https://github.com/aws-samples/sagemaker-studio-mlflow-integration.git) since it ensures the user has the right permissions in place to execute the lab.\n", 18 | "We will train a model in SageMaker, but use MLflow to track the experiments, register the model, and we will then deploy to the SageMaker managed infrastructure the resulting model.\n", 19 | "\n", 20 | "## Pre-Requisites\n", 21 | "\n", 22 | "* Successfully deployed the CDK sample in [this repository](https://github.com/aws-samples/sagemaker-studio-mlflow-integration.git).\n", 23 | "* Access to the `mlflow-admin` user profile in the created SageMaker Studio domain and use the `Base Python 2.0` image on a `Python 3` kernel.\n", 24 | "\n", 25 | "## The Machine Learning Problem\n", 26 | "\n", 27 | "In this example, we will solve a regression problem which aims to answer the question: \"what is the expected price of a house in the California area?\".\n", 28 | "The target variable is the house value for California districts, expressed in hundreds of thousands of dollars ($100,000).\n", 29 | "\n", 30 | "## Install required and/or update libraries\n", 31 | "\n", 32 | "At the time of writing, we have used the `sagemaker` SDK version 2. The MLFlow SDK library used is the one corresponding to our MLflow server version, i.e., `2.12.2`.\n", 33 | "We install the `mlflow[gateway]==2.12.2` to ensure that all required dependencies are installed." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "tags": [] 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "%pip install -q --upgrade pip setuptools wheel\n", 45 | "%pip install -q sagemaker \n", 46 | "%pip install -q requests_auth_aws_sigv4 boto3 mlflow[genai]==2.12.2\n", 47 | "%pip install -q langchain==0.0.354" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "Let's start by specifying:\n", 55 | "\n", 56 | "- The S3 bucket and prefix that you want to use for training and model data. This should be within the same region as the notebook instance, training, and hosting.\n", 57 | "- The IAM role arn associated with the user profile (`sagemaker.get_execution_role()`) which we will use to train in SageMaker, track the experiment in MLflow, register a model in MLflow, and host a MLflow model in SageMaker. See the [documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/using-identity-based-policies.html) for more details on creating these. The sagemaker execution role associated with the user profile `mlflow-admin` will have the appropriate permissions to do all these operations.\n", 58 | "- The tracking URI where the MLFlow server runs\n", 59 | "- The experiment name as the logical entity to keep our tests grouped and organized.\n", 60 | "\n", 61 | "If you examine the SageMaker execution role of the `mlflow-admin`, you will note that it has a in-line policy attached called `restApiAdmin` grating admin permissions on all resources and methods on the REST API Gateway shielding MLflow and it looks like the following:\n", 62 | "\n", 63 | "```json\n", 64 | "{\n", 65 | " \"Version\": \"2012-10-17\",\n", 66 | " \"Statement\": [\n", 67 | " {\n", 68 | " \"Action\": \"execute-api:Invoke\",\n", 69 | " \"Resource\": \"arn:aws:execute-api:::/*/*/*\",\n", 70 | " \"Effect\": \"Allow\"\n", 71 | " }\n", 72 | " ]\n", 73 | "}\n", 74 | "```" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "tags": [] 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "import os\n", 86 | "import pandas as pd\n", 87 | "import json\n", 88 | "import random\n", 89 | "import boto3\n", 90 | "import logging\n", 91 | "\n", 92 | "## SageMaker and SKlearn libraries\n", 93 | "import sagemaker\n", 94 | "\n", 95 | "## MLFlow libraries\n", 96 | "import mlflow\n", 97 | "from mlflow.tracking.client import MlflowClient\n", 98 | "import mlflow.sagemaker\n", 99 | "\n", 100 | "logging.getLogger(\"mlflow\").setLevel(logging.INFO)\n", 101 | "\n", 102 | "ssm = boto3.client('ssm')\n", 103 | "\n", 104 | "sess = sagemaker.Session()\n", 105 | "bucket = sess.default_bucket()\n", 106 | "region = sess.boto_region_name\n", 107 | "tracking_uri = ssm.get_parameter(Name=\"mlflow-restApiUrl\")['Parameter']['Value']\n", 108 | "api_gw_id = tracking_uri.split('//')[1].split('.')[0]\n", 109 | "experiment_name = 'DEMO-sigv4'\n", 110 | "model_name = 'california-housing-model'\n", 111 | "\n", 112 | "print(\"Tracking URI: {}\".format(tracking_uri))\n", 113 | "print('bucket: {}'.format(bucket))\n", 114 | "print(\"Using AWS Region: {}\".format(region))\n", 115 | "print(\"MLflow server URI: {}\".format(tracking_uri))" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "## Test MLFlow server accessibility" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "### Without using SigV4 (no env variable set) - should fail\n", 130 | "\n", 131 | "Uncomment this cell below to try the MLflow SDK without the environmental variable `MLFLOW_TRACKING_AWS_SIGV4` set and verify you cannot interact with the MLflow server." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "tags": [] 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "# try:\n", 143 | "# del os.environ['MLFLOW_TRACKING_AWS_SIGV4']\n", 144 | "# except:\n", 145 | "# print('env variable not there')\n", 146 | "# mlflow.set_tracking_uri(tracking_uri)\n", 147 | "# mlflow.set_experiment(experiment_name)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "### With env variable set: should succeed is the sagemaker execution role has permission to call the MLFlow endpoint" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "tags": [] 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "os.environ['MLFLOW_TRACKING_AWS_SIGV4'] = \"True\"\n", 166 | "mlflow.set_tracking_uri(tracking_uri)\n", 167 | "mlflow.set_experiment(experiment_name)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "## MLFlow Server access" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "tags": [] 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "!python -m requests_auth_aws_sigv4 https://{api_gw_id}.execute-api.{region}.amazonaws.com/prod/api/2.0/mlflow/experiments/get?experiment_id=0 -v" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "## MLflow Gateway access" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": { 199 | "tags": [] 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "!python -m requests_auth_aws_sigv4 https://{api_gw_id}.execute-api.{region}.amazonaws.com/prod/api/2.0/endpoints/ -v" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "# MLflow Gateway AI" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "## Client API" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": { 224 | "tags": [] 225 | }, 226 | "outputs": [], 227 | "source": [ 228 | "from mlflow.deployments import get_deploy_client\n", 229 | "\n", 230 | "client = get_deploy_client(tracking_uri)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "### List all routes" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": { 244 | "tags": [] 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "endpoints = client.list_endpoints()\n", 249 | "for endpoint in endpoints:\n", 250 | " print(endpoints)\n", 251 | "endpoint_name = endpoint.name" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "### Query a route" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": { 265 | "tags": [] 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "import json\n", 270 | "\n", 271 | "response = client.predict(\n", 272 | " endpoint=endpoint_name, inputs={'prompt':'Tell me a funny story about a fish'}\n", 273 | ")\n", 274 | "\n", 275 | "json_formatted = json.dumps(response, indent=1)\n", 276 | "print(json_formatted)" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "## Langchain" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": { 290 | "tags": [] 291 | }, 292 | "outputs": [], 293 | "source": [ 294 | "import mlflow\n", 295 | "from langchain import LLMChain, PromptTemplate\n", 296 | "from langchain.llms import Mlflow\n", 297 | "\n", 298 | "gateway = Mlflow(\n", 299 | " target_uri=tracking_uri,\n", 300 | " endpoint=endpoint_name\n", 301 | ")\n", 302 | "\n", 303 | "llm_chain = LLMChain(\n", 304 | " llm=gateway,\n", 305 | " prompt=PromptTemplate(\n", 306 | " input_variables=[\"adjective\"],\n", 307 | " template=\"Tell me a {adjective} joke\",\n", 308 | " ),\n", 309 | ")\n", 310 | "result = llm_chain.run(adjective=\"funny\")\n", 311 | "print(result)\n", 312 | "\n", 313 | "with mlflow.start_run():\n", 314 | " model_info = mlflow.langchain.log_model(llm_chain, \"model\")\n", 315 | "\n", 316 | "model = mlflow.pyfunc.load_model(model_info.model_uri)\n", 317 | "print(model.predict([{\"adjective\": \"funny\"}]))" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [] 326 | } 327 | ], 328 | "metadata": { 329 | "availableInstances": [ 330 | { 331 | "_defaultOrder": 0, 332 | "_isFastLaunch": true, 333 | "category": "General purpose", 334 | "gpuNum": 0, 335 | "hideHardwareSpecs": false, 336 | "memoryGiB": 4, 337 | "name": "ml.t3.medium", 338 | "vcpuNum": 2 339 | }, 340 | { 341 | "_defaultOrder": 1, 342 | "_isFastLaunch": false, 343 | "category": "General purpose", 344 | "gpuNum": 0, 345 | "hideHardwareSpecs": false, 346 | "memoryGiB": 8, 347 | "name": "ml.t3.large", 348 | "vcpuNum": 2 349 | }, 350 | { 351 | "_defaultOrder": 2, 352 | "_isFastLaunch": false, 353 | "category": "General purpose", 354 | "gpuNum": 0, 355 | "hideHardwareSpecs": false, 356 | "memoryGiB": 16, 357 | "name": "ml.t3.xlarge", 358 | "vcpuNum": 4 359 | }, 360 | { 361 | "_defaultOrder": 3, 362 | "_isFastLaunch": false, 363 | "category": "General purpose", 364 | "gpuNum": 0, 365 | "hideHardwareSpecs": false, 366 | "memoryGiB": 32, 367 | "name": "ml.t3.2xlarge", 368 | "vcpuNum": 8 369 | }, 370 | { 371 | "_defaultOrder": 4, 372 | "_isFastLaunch": true, 373 | "category": "General purpose", 374 | "gpuNum": 0, 375 | "hideHardwareSpecs": false, 376 | "memoryGiB": 8, 377 | "name": "ml.m5.large", 378 | "vcpuNum": 2 379 | }, 380 | { 381 | "_defaultOrder": 5, 382 | "_isFastLaunch": false, 383 | "category": "General purpose", 384 | "gpuNum": 0, 385 | "hideHardwareSpecs": false, 386 | "memoryGiB": 16, 387 | "name": "ml.m5.xlarge", 388 | "vcpuNum": 4 389 | }, 390 | { 391 | "_defaultOrder": 6, 392 | "_isFastLaunch": false, 393 | "category": "General purpose", 394 | "gpuNum": 0, 395 | "hideHardwareSpecs": false, 396 | "memoryGiB": 32, 397 | "name": "ml.m5.2xlarge", 398 | "vcpuNum": 8 399 | }, 400 | { 401 | "_defaultOrder": 7, 402 | "_isFastLaunch": false, 403 | "category": "General purpose", 404 | "gpuNum": 0, 405 | "hideHardwareSpecs": false, 406 | "memoryGiB": 64, 407 | "name": "ml.m5.4xlarge", 408 | "vcpuNum": 16 409 | }, 410 | { 411 | "_defaultOrder": 8, 412 | "_isFastLaunch": false, 413 | "category": "General purpose", 414 | "gpuNum": 0, 415 | "hideHardwareSpecs": false, 416 | "memoryGiB": 128, 417 | "name": "ml.m5.8xlarge", 418 | "vcpuNum": 32 419 | }, 420 | { 421 | "_defaultOrder": 9, 422 | "_isFastLaunch": false, 423 | "category": "General purpose", 424 | "gpuNum": 0, 425 | "hideHardwareSpecs": false, 426 | "memoryGiB": 192, 427 | "name": "ml.m5.12xlarge", 428 | "vcpuNum": 48 429 | }, 430 | { 431 | "_defaultOrder": 10, 432 | "_isFastLaunch": false, 433 | "category": "General purpose", 434 | "gpuNum": 0, 435 | "hideHardwareSpecs": false, 436 | "memoryGiB": 256, 437 | "name": "ml.m5.16xlarge", 438 | "vcpuNum": 64 439 | }, 440 | { 441 | "_defaultOrder": 11, 442 | "_isFastLaunch": false, 443 | "category": "General purpose", 444 | "gpuNum": 0, 445 | "hideHardwareSpecs": false, 446 | "memoryGiB": 384, 447 | "name": "ml.m5.24xlarge", 448 | "vcpuNum": 96 449 | }, 450 | { 451 | "_defaultOrder": 12, 452 | "_isFastLaunch": false, 453 | "category": "General purpose", 454 | "gpuNum": 0, 455 | "hideHardwareSpecs": false, 456 | "memoryGiB": 8, 457 | "name": "ml.m5d.large", 458 | "vcpuNum": 2 459 | }, 460 | { 461 | "_defaultOrder": 13, 462 | "_isFastLaunch": false, 463 | "category": "General purpose", 464 | "gpuNum": 0, 465 | "hideHardwareSpecs": false, 466 | "memoryGiB": 16, 467 | "name": "ml.m5d.xlarge", 468 | "vcpuNum": 4 469 | }, 470 | { 471 | "_defaultOrder": 14, 472 | "_isFastLaunch": false, 473 | "category": "General purpose", 474 | "gpuNum": 0, 475 | "hideHardwareSpecs": false, 476 | "memoryGiB": 32, 477 | "name": "ml.m5d.2xlarge", 478 | "vcpuNum": 8 479 | }, 480 | { 481 | "_defaultOrder": 15, 482 | "_isFastLaunch": false, 483 | "category": "General purpose", 484 | "gpuNum": 0, 485 | "hideHardwareSpecs": false, 486 | "memoryGiB": 64, 487 | "name": "ml.m5d.4xlarge", 488 | "vcpuNum": 16 489 | }, 490 | { 491 | "_defaultOrder": 16, 492 | "_isFastLaunch": false, 493 | "category": "General purpose", 494 | "gpuNum": 0, 495 | "hideHardwareSpecs": false, 496 | "memoryGiB": 128, 497 | "name": "ml.m5d.8xlarge", 498 | "vcpuNum": 32 499 | }, 500 | { 501 | "_defaultOrder": 17, 502 | "_isFastLaunch": false, 503 | "category": "General purpose", 504 | "gpuNum": 0, 505 | "hideHardwareSpecs": false, 506 | "memoryGiB": 192, 507 | "name": "ml.m5d.12xlarge", 508 | "vcpuNum": 48 509 | }, 510 | { 511 | "_defaultOrder": 18, 512 | "_isFastLaunch": false, 513 | "category": "General purpose", 514 | "gpuNum": 0, 515 | "hideHardwareSpecs": false, 516 | "memoryGiB": 256, 517 | "name": "ml.m5d.16xlarge", 518 | "vcpuNum": 64 519 | }, 520 | { 521 | "_defaultOrder": 19, 522 | "_isFastLaunch": false, 523 | "category": "General purpose", 524 | "gpuNum": 0, 525 | "hideHardwareSpecs": false, 526 | "memoryGiB": 384, 527 | "name": "ml.m5d.24xlarge", 528 | "vcpuNum": 96 529 | }, 530 | { 531 | "_defaultOrder": 20, 532 | "_isFastLaunch": false, 533 | "category": "General purpose", 534 | "gpuNum": 0, 535 | "hideHardwareSpecs": true, 536 | "memoryGiB": 0, 537 | "name": "ml.geospatial.interactive", 538 | "supportedImageNames": [ 539 | "sagemaker-geospatial-v1-0" 540 | ], 541 | "vcpuNum": 0 542 | }, 543 | { 544 | "_defaultOrder": 21, 545 | "_isFastLaunch": true, 546 | "category": "Compute optimized", 547 | "gpuNum": 0, 548 | "hideHardwareSpecs": false, 549 | "memoryGiB": 4, 550 | "name": "ml.c5.large", 551 | "vcpuNum": 2 552 | }, 553 | { 554 | "_defaultOrder": 22, 555 | "_isFastLaunch": false, 556 | "category": "Compute optimized", 557 | "gpuNum": 0, 558 | "hideHardwareSpecs": false, 559 | "memoryGiB": 8, 560 | "name": "ml.c5.xlarge", 561 | "vcpuNum": 4 562 | }, 563 | { 564 | "_defaultOrder": 23, 565 | "_isFastLaunch": false, 566 | "category": "Compute optimized", 567 | "gpuNum": 0, 568 | "hideHardwareSpecs": false, 569 | "memoryGiB": 16, 570 | "name": "ml.c5.2xlarge", 571 | "vcpuNum": 8 572 | }, 573 | { 574 | "_defaultOrder": 24, 575 | "_isFastLaunch": false, 576 | "category": "Compute optimized", 577 | "gpuNum": 0, 578 | "hideHardwareSpecs": false, 579 | "memoryGiB": 32, 580 | "name": "ml.c5.4xlarge", 581 | "vcpuNum": 16 582 | }, 583 | { 584 | "_defaultOrder": 25, 585 | "_isFastLaunch": false, 586 | "category": "Compute optimized", 587 | "gpuNum": 0, 588 | "hideHardwareSpecs": false, 589 | "memoryGiB": 72, 590 | "name": "ml.c5.9xlarge", 591 | "vcpuNum": 36 592 | }, 593 | { 594 | "_defaultOrder": 26, 595 | "_isFastLaunch": false, 596 | "category": "Compute optimized", 597 | "gpuNum": 0, 598 | "hideHardwareSpecs": false, 599 | "memoryGiB": 96, 600 | "name": "ml.c5.12xlarge", 601 | "vcpuNum": 48 602 | }, 603 | { 604 | "_defaultOrder": 27, 605 | "_isFastLaunch": false, 606 | "category": "Compute optimized", 607 | "gpuNum": 0, 608 | "hideHardwareSpecs": false, 609 | "memoryGiB": 144, 610 | "name": "ml.c5.18xlarge", 611 | "vcpuNum": 72 612 | }, 613 | { 614 | "_defaultOrder": 28, 615 | "_isFastLaunch": false, 616 | "category": "Compute optimized", 617 | "gpuNum": 0, 618 | "hideHardwareSpecs": false, 619 | "memoryGiB": 192, 620 | "name": "ml.c5.24xlarge", 621 | "vcpuNum": 96 622 | }, 623 | { 624 | "_defaultOrder": 29, 625 | "_isFastLaunch": true, 626 | "category": "Accelerated computing", 627 | "gpuNum": 1, 628 | "hideHardwareSpecs": false, 629 | "memoryGiB": 16, 630 | "name": "ml.g4dn.xlarge", 631 | "vcpuNum": 4 632 | }, 633 | { 634 | "_defaultOrder": 30, 635 | "_isFastLaunch": false, 636 | "category": "Accelerated computing", 637 | "gpuNum": 1, 638 | "hideHardwareSpecs": false, 639 | "memoryGiB": 32, 640 | "name": "ml.g4dn.2xlarge", 641 | "vcpuNum": 8 642 | }, 643 | { 644 | "_defaultOrder": 31, 645 | "_isFastLaunch": false, 646 | "category": "Accelerated computing", 647 | "gpuNum": 1, 648 | "hideHardwareSpecs": false, 649 | "memoryGiB": 64, 650 | "name": "ml.g4dn.4xlarge", 651 | "vcpuNum": 16 652 | }, 653 | { 654 | "_defaultOrder": 32, 655 | "_isFastLaunch": false, 656 | "category": "Accelerated computing", 657 | "gpuNum": 1, 658 | "hideHardwareSpecs": false, 659 | "memoryGiB": 128, 660 | "name": "ml.g4dn.8xlarge", 661 | "vcpuNum": 32 662 | }, 663 | { 664 | "_defaultOrder": 33, 665 | "_isFastLaunch": false, 666 | "category": "Accelerated computing", 667 | "gpuNum": 4, 668 | "hideHardwareSpecs": false, 669 | "memoryGiB": 192, 670 | "name": "ml.g4dn.12xlarge", 671 | "vcpuNum": 48 672 | }, 673 | { 674 | "_defaultOrder": 34, 675 | "_isFastLaunch": false, 676 | "category": "Accelerated computing", 677 | "gpuNum": 1, 678 | "hideHardwareSpecs": false, 679 | "memoryGiB": 256, 680 | "name": "ml.g4dn.16xlarge", 681 | "vcpuNum": 64 682 | }, 683 | { 684 | "_defaultOrder": 35, 685 | "_isFastLaunch": false, 686 | "category": "Accelerated computing", 687 | "gpuNum": 1, 688 | "hideHardwareSpecs": false, 689 | "memoryGiB": 61, 690 | "name": "ml.p3.2xlarge", 691 | "vcpuNum": 8 692 | }, 693 | { 694 | "_defaultOrder": 36, 695 | "_isFastLaunch": false, 696 | "category": "Accelerated computing", 697 | "gpuNum": 4, 698 | "hideHardwareSpecs": false, 699 | "memoryGiB": 244, 700 | "name": "ml.p3.8xlarge", 701 | "vcpuNum": 32 702 | }, 703 | { 704 | "_defaultOrder": 37, 705 | "_isFastLaunch": false, 706 | "category": "Accelerated computing", 707 | "gpuNum": 8, 708 | "hideHardwareSpecs": false, 709 | "memoryGiB": 488, 710 | "name": "ml.p3.16xlarge", 711 | "vcpuNum": 64 712 | }, 713 | { 714 | "_defaultOrder": 38, 715 | "_isFastLaunch": false, 716 | "category": "Accelerated computing", 717 | "gpuNum": 8, 718 | "hideHardwareSpecs": false, 719 | "memoryGiB": 768, 720 | "name": "ml.p3dn.24xlarge", 721 | "vcpuNum": 96 722 | }, 723 | { 724 | "_defaultOrder": 39, 725 | "_isFastLaunch": false, 726 | "category": "Memory Optimized", 727 | "gpuNum": 0, 728 | "hideHardwareSpecs": false, 729 | "memoryGiB": 16, 730 | "name": "ml.r5.large", 731 | "vcpuNum": 2 732 | }, 733 | { 734 | "_defaultOrder": 40, 735 | "_isFastLaunch": false, 736 | "category": "Memory Optimized", 737 | "gpuNum": 0, 738 | "hideHardwareSpecs": false, 739 | "memoryGiB": 32, 740 | "name": "ml.r5.xlarge", 741 | "vcpuNum": 4 742 | }, 743 | { 744 | "_defaultOrder": 41, 745 | "_isFastLaunch": false, 746 | "category": "Memory Optimized", 747 | "gpuNum": 0, 748 | "hideHardwareSpecs": false, 749 | "memoryGiB": 64, 750 | "name": "ml.r5.2xlarge", 751 | "vcpuNum": 8 752 | }, 753 | { 754 | "_defaultOrder": 42, 755 | "_isFastLaunch": false, 756 | "category": "Memory Optimized", 757 | "gpuNum": 0, 758 | "hideHardwareSpecs": false, 759 | "memoryGiB": 128, 760 | "name": "ml.r5.4xlarge", 761 | "vcpuNum": 16 762 | }, 763 | { 764 | "_defaultOrder": 43, 765 | "_isFastLaunch": false, 766 | "category": "Memory Optimized", 767 | "gpuNum": 0, 768 | "hideHardwareSpecs": false, 769 | "memoryGiB": 256, 770 | "name": "ml.r5.8xlarge", 771 | "vcpuNum": 32 772 | }, 773 | { 774 | "_defaultOrder": 44, 775 | "_isFastLaunch": false, 776 | "category": "Memory Optimized", 777 | "gpuNum": 0, 778 | "hideHardwareSpecs": false, 779 | "memoryGiB": 384, 780 | "name": "ml.r5.12xlarge", 781 | "vcpuNum": 48 782 | }, 783 | { 784 | "_defaultOrder": 45, 785 | "_isFastLaunch": false, 786 | "category": "Memory Optimized", 787 | "gpuNum": 0, 788 | "hideHardwareSpecs": false, 789 | "memoryGiB": 512, 790 | "name": "ml.r5.16xlarge", 791 | "vcpuNum": 64 792 | }, 793 | { 794 | "_defaultOrder": 46, 795 | "_isFastLaunch": false, 796 | "category": "Memory Optimized", 797 | "gpuNum": 0, 798 | "hideHardwareSpecs": false, 799 | "memoryGiB": 768, 800 | "name": "ml.r5.24xlarge", 801 | "vcpuNum": 96 802 | }, 803 | { 804 | "_defaultOrder": 47, 805 | "_isFastLaunch": false, 806 | "category": "Accelerated computing", 807 | "gpuNum": 1, 808 | "hideHardwareSpecs": false, 809 | "memoryGiB": 16, 810 | "name": "ml.g5.xlarge", 811 | "vcpuNum": 4 812 | }, 813 | { 814 | "_defaultOrder": 48, 815 | "_isFastLaunch": false, 816 | "category": "Accelerated computing", 817 | "gpuNum": 1, 818 | "hideHardwareSpecs": false, 819 | "memoryGiB": 32, 820 | "name": "ml.g5.2xlarge", 821 | "vcpuNum": 8 822 | }, 823 | { 824 | "_defaultOrder": 49, 825 | "_isFastLaunch": false, 826 | "category": "Accelerated computing", 827 | "gpuNum": 1, 828 | "hideHardwareSpecs": false, 829 | "memoryGiB": 64, 830 | "name": "ml.g5.4xlarge", 831 | "vcpuNum": 16 832 | }, 833 | { 834 | "_defaultOrder": 50, 835 | "_isFastLaunch": false, 836 | "category": "Accelerated computing", 837 | "gpuNum": 1, 838 | "hideHardwareSpecs": false, 839 | "memoryGiB": 128, 840 | "name": "ml.g5.8xlarge", 841 | "vcpuNum": 32 842 | }, 843 | { 844 | "_defaultOrder": 51, 845 | "_isFastLaunch": false, 846 | "category": "Accelerated computing", 847 | "gpuNum": 1, 848 | "hideHardwareSpecs": false, 849 | "memoryGiB": 256, 850 | "name": "ml.g5.16xlarge", 851 | "vcpuNum": 64 852 | }, 853 | { 854 | "_defaultOrder": 52, 855 | "_isFastLaunch": false, 856 | "category": "Accelerated computing", 857 | "gpuNum": 4, 858 | "hideHardwareSpecs": false, 859 | "memoryGiB": 192, 860 | "name": "ml.g5.12xlarge", 861 | "vcpuNum": 48 862 | }, 863 | { 864 | "_defaultOrder": 53, 865 | "_isFastLaunch": false, 866 | "category": "Accelerated computing", 867 | "gpuNum": 4, 868 | "hideHardwareSpecs": false, 869 | "memoryGiB": 384, 870 | "name": "ml.g5.24xlarge", 871 | "vcpuNum": 96 872 | }, 873 | { 874 | "_defaultOrder": 54, 875 | "_isFastLaunch": false, 876 | "category": "Accelerated computing", 877 | "gpuNum": 8, 878 | "hideHardwareSpecs": false, 879 | "memoryGiB": 768, 880 | "name": "ml.g5.48xlarge", 881 | "vcpuNum": 192 882 | }, 883 | { 884 | "_defaultOrder": 55, 885 | "_isFastLaunch": false, 886 | "category": "Accelerated computing", 887 | "gpuNum": 8, 888 | "hideHardwareSpecs": false, 889 | "memoryGiB": 1152, 890 | "name": "ml.p4d.24xlarge", 891 | "vcpuNum": 96 892 | }, 893 | { 894 | "_defaultOrder": 56, 895 | "_isFastLaunch": false, 896 | "category": "Accelerated computing", 897 | "gpuNum": 8, 898 | "hideHardwareSpecs": false, 899 | "memoryGiB": 1152, 900 | "name": "ml.p4de.24xlarge", 901 | "vcpuNum": 96 902 | }, 903 | { 904 | "_defaultOrder": 57, 905 | "_isFastLaunch": false, 906 | "category": "Accelerated computing", 907 | "gpuNum": 0, 908 | "hideHardwareSpecs": false, 909 | "memoryGiB": 32, 910 | "name": "ml.trn1.2xlarge", 911 | "vcpuNum": 8 912 | }, 913 | { 914 | "_defaultOrder": 58, 915 | "_isFastLaunch": false, 916 | "category": "Accelerated computing", 917 | "gpuNum": 0, 918 | "hideHardwareSpecs": false, 919 | "memoryGiB": 512, 920 | "name": "ml.trn1.32xlarge", 921 | "vcpuNum": 128 922 | }, 923 | { 924 | "_defaultOrder": 59, 925 | "_isFastLaunch": false, 926 | "category": "Accelerated computing", 927 | "gpuNum": 0, 928 | "hideHardwareSpecs": false, 929 | "memoryGiB": 512, 930 | "name": "ml.trn1n.32xlarge", 931 | "vcpuNum": 128 932 | } 933 | ], 934 | "instance_type": "ml.t3.medium", 935 | "interpreter": { 936 | "hash": "04ffa0b675ec4736afd1210dd81a6f70b0b4fa83298b056bd6b4e16ede0b389c" 937 | }, 938 | "kernelspec": { 939 | "display_name": "Python 3 (Base Python 2.0)", 940 | "language": "python", 941 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-central-1:936697816551:image/sagemaker-base-python-38" 942 | }, 943 | "language_info": { 944 | "codemirror_mode": { 945 | "name": "ipython", 946 | "version": 3 947 | }, 948 | "file_extension": ".py", 949 | "mimetype": "text/x-python", 950 | "name": "python", 951 | "nbconvert_exporter": "python", 952 | "pygments_lexer": "ipython3", 953 | "version": "3.8.12" 954 | } 955 | }, 956 | "nbformat": 4, 957 | "nbformat_minor": 4 958 | } 959 | -------------------------------------------------------------------------------- /lab/source_dir/requirements.txt: -------------------------------------------------------------------------------- 1 | mlflow==2.12.2 2 | requests_auth_aws_sigv4 3 | matplotlib 4 | sagemaker-experiments 5 | -------------------------------------------------------------------------------- /lab/source_dir/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | from setuptools import setup, find_packages 5 | 6 | setup(name='sagemaker-example', 7 | version='1.0', 8 | description='SageMaker MLFlow Example.', 9 | author='Paolo', 10 | author_email='frpaolo@amazon.at', 11 | packages=find_packages(exclude=('tests', 'docs'))) -------------------------------------------------------------------------------- /lab/source_dir/train.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | import os 5 | import logging 6 | import argparse 7 | import numpy as np 8 | import pandas as pd 9 | from sklearn.ensemble import RandomForestRegressor 10 | 11 | import mlflow 12 | import mlflow.sklearn 13 | from mlflow.tracking import MlflowClient 14 | 15 | import joblib 16 | import boto3 17 | import json 18 | import time 19 | 20 | from smexperiments.tracker import Tracker 21 | 22 | logging.basicConfig(level=logging.INFO) 23 | logging.getLogger("mlflow").setLevel(logging.INFO) 24 | 25 | tracking_uri = os.environ.get('MLFLOW_TRACKING_URI') 26 | experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') 27 | mlflow_amplify_ui = os.environ.get('MLFLOW_AMPLIFY_UI_URI') 28 | user = os.environ.get('MLFLOW_USER') 29 | 30 | def print_auto_logged_info(r): 31 | tags = {k: v for k, v in r.data.tags.items()} 32 | artifacts = [f.path for f in MlflowClient().list_artifacts(r.info.run_id, "model")] 33 | print("run_id: {}".format(r.info.run_id)) 34 | print("artifacts: {}".format(artifacts)) 35 | print("params: {}".format(r.data.params)) 36 | print("metrics: {}".format(r.data.metrics)) 37 | #print("tags: {}".format(tags)) 38 | 39 | if __name__ =='__main__': 40 | parser = argparse.ArgumentParser() 41 | # hyperparameters sent by the client are passed as command-line arguments to the script. 42 | # to simplify the demo we don't use all sklearn RandomForest hyperparameters 43 | parser.add_argument('--n-estimators', type=int, default=10) 44 | parser.add_argument('--min-samples-leaf', type=int, default=3) 45 | 46 | # Data, model, and output directories 47 | parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR')) 48 | parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) 49 | parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST')) 50 | parser.add_argument('--train-file', type=str, default='california_train.csv') 51 | parser.add_argument('--test-file', type=str, default='california_test.csv') 52 | parser.add_argument('--user', type=str, default='sagemaker') 53 | parser.add_argument('--features', type=str) # we ask user to explicitly name features 54 | parser.add_argument('--target', type=str) # we ask user to explicitly name the target 55 | 56 | args, _ = parser.parse_known_args() 57 | 58 | logging.info('reading data') 59 | train_df = pd.read_csv(os.path.join(args.train, args.train_file)) 60 | test_df = pd.read_csv(os.path.join(args.test, args.test_file)) 61 | 62 | logging.info('building training and testing datasets') 63 | X_train = train_df[args.features.split()] 64 | X_test = test_df[args.features.split()] 65 | y_train = train_df[args.target] 66 | y_test = test_df[args.target] 67 | 68 | region = os.environ.get('AWS_DEFAULT_REGION') 69 | 70 | # set remote mlflow server 71 | mlflow.set_tracking_uri(tracking_uri) 72 | experiment = mlflow.set_experiment(experiment_name) 73 | 74 | mlflow.autolog() 75 | 76 | with mlflow.start_run() as run: 77 | params = { 78 | "n-estimators": args.n_estimators, 79 | "min-samples-leaf": args.min_samples_leaf, 80 | "features": args.features 81 | } 82 | mlflow.log_params(params) 83 | 84 | # TRAIN 85 | logging.info('training model') 86 | model = RandomForestRegressor( 87 | n_estimators=args.n_estimators, 88 | min_samples_leaf=args.min_samples_leaf, 89 | n_jobs=-1 90 | ) 91 | 92 | model.fit(X_train, y_train) 93 | 94 | # ABS ERROR AND LOG COUPLE PERF METRICS 95 | logging.info('evaluating model') 96 | abs_err = np.abs(model.predict(X_test) - y_test) 97 | 98 | for q in [10, 50, 90]: 99 | logging.info(f'AE-at-{q}th-percentile: {np.percentile(a=abs_err, q=q)}') 100 | mlflow.log_metric(f'AE-at-{str(q)}th-percentile', np.percentile(a=abs_err, q=q)) 101 | 102 | # SAVE MODEL 103 | logging.info('saving model in MLflow') 104 | mlflow.sklearn.log_model(model, "model") 105 | 106 | sm_data = json.loads(os.environ.get('SM_TRAINING_ENV')) 107 | job_name = sm_data['job_name'] 108 | 109 | sm_client = boto3.client('sagemaker') 110 | training_job_details = sm_client.describe_training_job(TrainingJobName=job_name) 111 | input_data_config = training_job_details['InputDataConfig'] 112 | # Shovel info about the input data 113 | input_data = {} 114 | for item in input_data_config: 115 | input_data[item['ChannelName']] = item['DataSource']['S3DataSource']['S3Uri'] 116 | mlflow.set_tags(input_data) 117 | 118 | # Overwrite system tags 119 | mlflow.set_tags( 120 | { 121 | 'mlflow.source.name': f"https://{region}.console.aws.amazon.com/sagemaker/home?region={region}#/jobs/{job_name}", 122 | 'mlflow.source.type': 'JOB', 123 | 'mlflow.user': user 124 | } 125 | ) 126 | # Shovel all SageMaker related data into mlflow 127 | mlflow.set_tags(sm_data) 128 | 129 | run_id = run.info.run_id 130 | experiment_id = experiment.experiment_id 131 | 132 | r = mlflow.get_run(run_id=run_id) 133 | print_auto_logged_info(r) 134 | 135 | artifacts = [f.path for f in MlflowClient().list_artifacts(r.info.run_id, "model")] 136 | 137 | tracker_parameters = { 138 | "run_id": run_id, 139 | "experiment_id": experiment_id, 140 | "mlflow-run-url": f"{mlflow_amplify_ui}/#/experiments/{experiment_id}/runs/{run_id}" 141 | } 142 | try: 143 | with Tracker.load() as tracker: 144 | tracker.log_parameters(tracker_parameters) 145 | tracker.log_parameters(r.data.params) 146 | for metric_name, value in r.data.metrics.items(): 147 | tracker.log_metric(metric_name=metric_name, value=value) 148 | for artifact in artifacts: 149 | tracker.log_output(name=f"MLFlow.{artifact}", value=f"{r.info.artifact_uri}/{artifact}") 150 | # Nullify default SageMaker.ModelArtifact 151 | tracker.log_output(name="SageMaker.ModelArtifact", value="NA") 152 | print("Loaded existing tracker") 153 | except: 154 | print("Could not load tracker (likely running in local mode). Create a new one") 155 | create_date = time.strftime("%Y-%m-%d-%H-%M-%S") 156 | tracker_name = f"mlflow-tracker-{create_date}" 157 | with Tracker.create(display_name=tracker_name) as tracker: 158 | tracker.log_parameters(tracker_parameters) 159 | tracker.log_parameters(r.data.params) 160 | print("Metric cannot be logged when creating a tracker in this way") 161 | for artifact in artifacts: 162 | tracker.log_output(name=f"MLFlow.{artifact}", value=f"{r.info.artifact_uri}/{artifact}") 163 | tracker.log_output(name="SageMaker.ModelArtifact", value="NA") -------------------------------------------------------------------------------- /resize-cloud9.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify the desired volume size in GiB as a command line argument. If not specified, default to 20 GiB. 4 | SIZE=${1:-20} 5 | 6 | # Get the ID of the environment host Amazon EC2 instance. 7 | INSTANCEID=$(curl http://169.254.169.254/latest/meta-data/instance-id) 8 | REGION=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone | sed 's/\(.*\)[a-z]/\1/') 9 | 10 | # Get the ID of the Amazon EBS volume associated with the instance. 11 | VOLUMEID=$(aws ec2 describe-instances \ 12 | --instance-id $INSTANCEID \ 13 | --query "Reservations[0].Instances[0].BlockDeviceMappings[0].Ebs.VolumeId" \ 14 | --output text \ 15 | --region $REGION) 16 | 17 | # Resize the EBS volume. 18 | aws ec2 modify-volume --volume-id $VOLUMEID --size $SIZE 19 | 20 | # Wait for the resize to finish. 21 | while [ \ 22 | "$(aws ec2 describe-volumes-modifications \ 23 | --volume-id $VOLUMEID \ 24 | --filters Name=modification-state,Values="optimizing","completed" \ 25 | --query "length(VolumesModifications)"\ 26 | --output text)" != "1" ]; do 27 | sleep 1 28 | done 29 | 30 | #Check if we're on an NVMe filesystem 31 | if [[ -e "/dev/xvda" && $(readlink -f /dev/xvda) = "/dev/xvda" ]] 32 | then 33 | # Rewrite the partition table so that the partition takes up all the space that it can. 34 | sudo growpart /dev/xvda 1 35 | 36 | # Expand the size of the file system. 37 | # Check if we're on AL2 38 | STR=$(cat /etc/os-release) 39 | SUB="VERSION_ID=\"2\"" 40 | if [[ "$STR" == *"$SUB"* ]] 41 | then 42 | sudo xfs_growfs -d / 43 | else 44 | sudo resize2fs /dev/xvda1 45 | fi 46 | 47 | else 48 | # Rewrite the partition table so that the partition takes up all the space that it can. 49 | sudo growpart /dev/nvme0n1 1 50 | 51 | # Expand the size of the file system. 52 | # Check if we're on AL2 53 | STR=$(cat /etc/os-release) 54 | SUB="VERSION_ID=\"2\"" 55 | if [[ "$STR" == *"$SUB"* ]] 56 | then 57 | sudo xfs_growfs -d / 58 | else 59 | sudo resize2fs /dev/nvme0n1p1 60 | fi 61 | fi 62 | -------------------------------------------------------------------------------- /src/cognito/add_users_and_groups.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import os 3 | import getpass 4 | 5 | cognito_client = boto3.client('cognito-idp') 6 | user_pools = cognito_client.list_user_pools(MaxResults=60)['UserPools'] 7 | user_pool_id = [user_pool['Id'] for user_pool in user_pools if user_pool['Name']=='mlflow-user-pool'][0] 8 | groups = ['admins', 'readers', 'model-approvers'] 9 | list_groups = cognito_client.list_groups(UserPoolId=user_pool_id)['Groups'] 10 | existing_group_names = [group['GroupName'] for group in list_groups] 11 | users_groups = [ 12 | { 13 | 'username': 'mlflow-admin@example.com', 14 | 'group': 'admins' 15 | }, 16 | { 17 | 'username': 'mlflow-reader@example.com', 18 | 'group': 'readers', 19 | }, 20 | { 21 | 'username': 'mlflow-model-approver@example.com', 22 | 'group': 'model-approvers' 23 | } 24 | ] 25 | list_users = cognito_client.list_users(UserPoolId=user_pool_id)['Users'] 26 | 27 | existing_email_list = [] 28 | for user in list_users: 29 | attributes = user['Attributes'] 30 | email = [attribute['Value'] for attribute in attributes if attribute['Name']=='email'][0] 31 | existing_email_list.append(email) 32 | 33 | 34 | if __name__=="__main__": 35 | # Create groups 36 | for group in groups: 37 | if group in existing_group_names: 38 | print(f"group {group} already exists") 39 | else: 40 | print(f"create group {group} for cognito user pool {user_pool_id}") 41 | cognito_client.create_group( 42 | GroupName=group, 43 | UserPoolId=user_pool_id 44 | ) 45 | # Create users and associate them with a group 46 | for user_group in users_groups: 47 | username = user_group['username'] 48 | group = user_group['group'] 49 | if username in existing_email_list: 50 | print(f"user {username} already exist. skip it") 51 | else: 52 | print(f"create user {username}") 53 | cognito_client.admin_create_user( 54 | UserPoolId=user_pool_id, 55 | Username=username, 56 | #TemporaryPassword=args.password 57 | ) 58 | pwd = getpass.getpass(prompt = f"Enter the password for {username}: ") 59 | cognito_client.admin_set_user_password( 60 | UserPoolId=user_pool_id, 61 | Username=username, 62 | Password=pwd, 63 | Permanent=True # does not force a user to change the password 64 | ) 65 | 66 | print(f"add user {username} to group {group}") 67 | cognito_client.admin_add_user_to_group( 68 | UserPoolId=user_pool_id, 69 | Username=username, 70 | GroupName=group 71 | ) -------------------------------------------------------------------------------- /src/mlflow-gateway/.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | npm-debug.log -------------------------------------------------------------------------------- /src/mlflow-gateway/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.0 2 | 3 | ARG PORT=5001 4 | 5 | RUN pip install \ 6 | mlflow[genai]==2.12.2\ 7 | boto3 8 | 9 | WORKDIR /mlflow/ 10 | 11 | COPY config.yaml ./ 12 | 13 | EXPOSE ${PORT} 14 | 15 | CMD mlflow deployments start-server \ 16 | --host 0.0.0.0 \ 17 | --port ${PORT} \ 18 | --config-path config.yaml \ 19 | --workers ${WORKERS} -------------------------------------------------------------------------------- /src/mlflow-gateway/config.yaml: -------------------------------------------------------------------------------- 1 | endpoints: 2 | - name: completions 3 | endpoint_type: llm/v1/completions 4 | model: 5 | provider: bedrock 6 | name: anthropic.claude-v2 7 | config: 8 | aws_config: 9 | aws_region: us-west-2 -------------------------------------------------------------------------------- /src/mlflow-server/.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | npm-debug.log -------------------------------------------------------------------------------- /src/mlflow-server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.0 2 | 3 | ARG PORT=5000 4 | 5 | RUN pip install \ 6 | mlflow==2.12.2\ 7 | boto3 \ 8 | psycopg2==2.9.5 9 | 10 | WORKDIR /mlflow/ 11 | 12 | EXPOSE 5000 13 | 14 | CMD mlflow server \ 15 | --host 0.0.0.0 \ 16 | --port ${PORT} \ 17 | --default-artifact-root ${BUCKET} \ 18 | --gunicorn-opts "--log-level debug --timeout 180 --workers=2 --threads=4 --worker-class=gthread" \ 19 | --backend-store-uri postgresql+psycopg2://${USERNAME}:${PASSWORD}@${DBHOST}:${DBPORT}/${DATABASE} 20 | --------------------------------------------------------------------------------